예제 #1
0
    def train(self, x_train, x_val, name=None, epochs=None, batch_size=None, steps_per_epoch=None):

        if epochs is None:
            epochs = self.params['epochs']
        if batch_size is None:
            batch_size = self.params['batch_size']
        if steps_per_epoch is None:
            steps_per_epoch = self.params['steps_per_epoch']

        # checkpoint = ModelCheckpoint(self.config.get_filepath_ae_model("/checkpoints/deep_ae_encoder-{epoch:02d}-{val_loss:.2f}"), monitor='val_loss', verbose=1, save_best_only=True, mode='auto')

        # Train autoencoder for 50 epochs
        history = self.autoencoder.fit_generator(self.generator(x_train, batch_size),
                                                 validation_data=(x_val, x_val),
                                                 steps_per_epoch=steps_per_epoch,
                                                 epochs=epochs,
                                                 verbose=2)# callbacks=[checkpoint],

        # history = self.autoencoder.fit(x_train, x_train, epochs=epochs, batch_size=batch_size, shuffle=True,
        #                           validation_data=(x_test, x_test), verbose=2)

        print(history.history.keys())

        if self.plot:
            plotting = Plotting()
            plotting.plot_loss(history.history['loss'], history.history['val_loss'], "deep_loss")

        if name is not None:
            self.save_model(name)
def simulate():
    plotting = Plotting()
    preprocess_logreturns = PreprocessData()
    preprocess_logreturns.enable_log_returns = True

    # 1. get data and apply minimax
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_logreturns.get_data()

    sets_training_first_last_tenors = []
    for set_training_scaled in sets_training_scaled:
        sets_training_first_last_tenors.append(set_training_scaled[:,[0,-1]])
    # sets_training_first_last_tenors = np.array(sets_training_first_last_tenors)

    sets_test_first_last_tenors = []
    for set_test_scaled in sets_test_scaled:
        sets_test_first_last_tenors.append(set_test_scaled[:,[0,-1]])
    # sets_test_first_last_tenors = np.array(sets_test_first_last_tenors)

    gan_params = {'num_tenors': sets_training_first_last_tenors[0].shape[1],
              'num_c': 6*7,
              'num_z': 6*7,
              'num_o': 6*7,
              'gen_model_type': 'standard', # conv
              'dis_model_type': 'standard', # conv
              'gen_layers': (4*(6*7*2),), # 4 * num_o * num_tenors
              'dis_layers': (4*(6*7),), # 4 * num_o
              'gen_last_activation': 'tanh',
              'dis_last_activation': 'sigmoid',
              'loss': 'binary_crossentropy',
              'batch_size': 128,
              'epochs': 20000}
    gan_params_hash = hashlib.md5(json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest()

    gan = GAN(gan_params)
    # gan.train(np.vstack(sets_training_first_last_tenors))
    # gan.save_model("gan_test_" + gan_params_hash)
    gan.load_model("gan_test_" + gan_params_hash)

    # 4: simulate on encoded log returns, conditioned on test dataset
    num_simulations = 10
    num_repeats = 20
    generated_segments, real_segment = gan.generate(data=sets_test_first_last_tenors[-1], num_simulations=num_simulations, remove_condition=False)
    last_generated_segment = generated_segments
    for _ in np.arange(num_repeats - 1):
        generated_temp, real_temp = gan.generate(condition=last_generated_segment, remove_condition=True)
        last_generated_segment = generated_temp
        generated_segments = np.append(generated_segments, generated_temp, axis=1)

    # 5: undo log-returns
    generated_segments = preprocess_logreturns.rescale_data(generated_segments, start_value=sets_test_first_last_tenors[-1][-1])
    # plotting.plot_3d_many(file_name, data, save=False)
    plotting.plot_3d_training("3d recursively generated with GAN, test", generated_segments, sets_test[-1], show=True, after_real_data=True)
    def all_log_returns(self):
        preprocess_data = PreprocessData()
        plotting = Plotting()

        preprocess_data.enable_log_returns = True
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data(
        )
        for i, set_training_scaled in enumerate(sets_training_scaled):
            print("set_training_scaled.shape", set_training_scaled.shape, i)
            plotting.plot_2d(set_training_scaled,
                             "/time_series/" + training_dataset_names[i],
                             timeseries=True,
                             save=False,
                             title=True)
    def __init__(self):
        self.preprocess_data = PreprocessData()
        self.plotting = Plotting()
        self.config = Config()

        # self.preprocess_data.enable_min_max_scaler = True
        self.preprocess_data.enable_log_returns = True
        self.sets_training, self.sets_test, self.sets_training_scaled, self.sets_test_scaled, \
        self.training_dataset_names, self.test_dataset_names, self.maturities = self.preprocess_data.get_data()

        wti_nymex = self.sets_test[0]
        time = wti_nymex.axes[0].tolist()

        self.wti_nymex_short_end = wti_nymex.iloc[:, 0]
        self.data_scaled = self.sets_test_scaled[0][0]
    def __init__(self):
        self.preprocess_data = PreprocessData()
        self.plotting = Plotting()
        self.config = Config()

        # self.preprocess_data.enable_min_max_scaler = True
        self.preprocess_data.enable_log_returns = True
        self.sets_training, self.sets_test, self.sets_training_scaled, self.sets_test_scaled, \
        self.training_dataset_names, self.test_dataset_names, self.maturities = self.preprocess_data.get_data()

        self.wti_nymex = self.sets_test[0]
        time = self.wti_nymex.axes[0].tolist()

        self.wti_nymex_short_end = self.wti_nymex.iloc[:, 0]
        self.data_scaled = self.sets_test_scaled[0][0]

        self.train_len = 128
        self.test_len = 42
        self.data_train = self.wti_nymex[:self.train_len]
        self.data_test = self.wti_nymex[self.train_len:self.train_len +
                                        self.test_len]
        self.data_train_and_test = self.wti_nymex[:self.train_len +
                                                  self.test_len]

        print("self.data_train.shape", self.data_train.shape)
        print("self.data_test.shape", self.data_test.shape)
예제 #6
0
    def __init__(self, params, plot=False):
        self.config = Config()
        self.plotting = Plotting()

        self.params = params
        self.plot = plot
        self.build_model(params)
    def __init__(self):
        print("Andersen Markov Model")

        self.plotting = Plotting()
        preprocess_logreturns = PreprocessData()
        preprocess_logreturns.enable_log_returns = True

        # 1. get data and apply minimax
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_logreturns.get_data(
        )

        # tenors: rate tenors in year fractions (from 0.083 to 5 over 60 steps)
        # rates: corresponding zero rates matrix
        # obs_time: observation dates in year fractions (starting at the first date)
        #           988 steps from -3.835... to 0 on the WTI NYMEX data

        num_c = 6 * 7  # add '* 20' to see if a larger training set helps
        num_o = 6 * 7

        train_set = sets_test[-1].iloc[:num_c]
        test_set = sets_test[-1].iloc[num_c:num_c + num_o + 1]
        num_of_test_curves = len(test_set)

        self.test_set = test_set

        tenors = maturities
        self.tenors = tenors[:, np.newaxis]
        self.rates = np.array(train_set)

        index = pd.Series(train_set.index)
        end_num = toYearFraction(sets_test[-1].index[-1])
        dates_as_decimal = np.array(
            index.apply(lambda x: toYearFraction(x, end_num)))
        self.dates_as_decimal = dates_as_decimal[:, np.newaxis]
        print("test_set.shape", np.array(test_set).shape)
        smape_results = []
        for i in np.arange(100):

            simulated_rates = self.simulate(num_of_test_curves)

            smape_result = smape(simulated_rates, test_set)
            smape_results.append(smape_result)

            print("simulate rates", i)
            print("simulated, real",
                  np.array(simulated_rates).shape,
                  np.array(test_set).shape)
            print("smape:", smape_result)
            print("=============\n")

            # self.plotting.plot_3d("real", test_set, show_title=False)
            # self.plotting.plot_3d("AMM_simulated_" + str(i), simulated_rates, show_title=False)
            #
            # cov_log_returns = cov_log_returns_over_features(simulated_rates)
            # self.plotting.plot_3d_cov("AMM_simulated_" + str(i) + "_cov", cov_log_returns, show_title=False)

        smape_results = np.array(smape_results)
        # print("smape_results:", smape_results)
        print("smape mean and std:", np.mean(smape_results),
              np.std(smape_results))
    def __init__(self, params, plot=True):

        self.k = params['latent_dim']
        self.A_tilde = None
        self.mu = None
        self.plot = plot

        self.config = Config()
        self.plotting = Plotting()

        print("PCA")
    def __init__(self, params, plot=True):

        self.config = Config()
        self.plotting = Plotting()
        self.params = params
        self.plot = plot

        self.input_dim = params['input_dim']
        self.latent_dim = params['latent_dim']

        optimizer = Adam(0.0002, 0.5)  # learning rate, beta_1

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator(params)
        self.discriminator.compile(loss=params['loss_discriminator'],
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # Build the encoder / decoder
        self.encoder = self.build_encoder(params)
        self.decoder = self.build_decoder(params)

        img = Input(shape=(self.input_dim, ))
        # The generator takes the image, encodes it and reconstructs it
        # from the encoding
        encoded_repr = self.encoder(img)
        reconstructed_img = self.decoder(encoded_repr)

        # For the adversarial_autoencoder model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator determines validity of the encoding
        validity = self.discriminator(encoded_repr)

        # The adversarial_autoencoder model  (stacked generator and discriminator)
        self.adversarial_autoencoder = Model(img,
                                             [reconstructed_img, validity])
        self.adversarial_autoencoder.compile(
            loss=[params['loss_generator'], params['loss_discriminator']],
            loss_weights=[0.999, 0.001],
            optimizer=optimizer)
    def __init__(self, params, plot=True):

        self.config = Config()
        self.plotting = Plotting()

        self.params = params
        self.plot = plot

        # Number of Conditioning, Random and Prediction returns
        self.num_c = params["num_c"]
        self.num_z = params["num_z"]
        self.num_o = params["num_o"]
        self.num_tenors = params["num_tenors"]

        optimizer = Adam(1e-5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss=params["loss"],
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates imgs
        condition = Input(shape=(self.num_c, self.num_tenors))
        noise = Input(shape=(self.num_z, self.num_tenors))
        img = self.generator([condition, noise])

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated images as input and determines validity
        validity = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self.combined = Model([condition, noise], validity)
        self.combined.compile(loss=params["loss"], optimizer=optimizer)
예제 #11
0
    def simulate(self):
        plotting = Plotting()
        old_rates = self.model.rates

        plotting.plot_3d("AMModel_input_data", old_rates)
        plotting.plot_2d(old_rates[-1, :], "AMModel_input_data_first")

        tenors = self.model.tenors
        obs_time = self.model.obs_time

        print("tenors", tenors)
        print("obs_time", obs_time)
        print("old_rates", old_rates)

        self.model.make_data()

        rates = self.model.rates

        print("new rates", rates)

        plotting.plot_3d("AMModel_test",
                         rates)  # , maturities=tenors, time=obs_time
        print("made data")
class Analysis():
    def __init__(self):
        self.preprocess_data = PreprocessData()
        self.plotting = Plotting()
        self.config = Config()

        # self.preprocess_data.enable_min_max_scaler = True
        self.preprocess_data.enable_log_returns = True
        self.sets_training, self.sets_test, self.sets_training_scaled, self.sets_test_scaled, \
        self.training_dataset_names, self.test_dataset_names, self.maturities = self.preprocess_data.get_data()

        wti_nymex = self.sets_test[0]
        time = wti_nymex.axes[0].tolist()

        self.wti_nymex_short_end = wti_nymex.iloc[:, 0]
        self.data_scaled = self.sets_test_scaled[0][0]

    def normalisation_over_tenors(self):
        preprocess = PreprocessData(PreprocessType.NORMALISATION_OVER_TENORS)
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
        )

        print("sets_test[0].shape", sets_test[0].shape,
              sets_test_scaled[0].shape)

        self.plotting.plot_some_curves(
            "normalisation_over_tenors",
            sets_test[0],
            sets_test_scaled[0], [25, 50, 75, 815],
            maturities,
            plot_separate=True)  # old: [25, 50, 75, 100, 600, 720, 740, 815]

    def standardisation_over_tenors(self):
        preprocess = PreprocessData(PreprocessType.STANDARDISATION_OVER_TENORS)
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
        )

        self.plotting.plot_some_curves(
            "standardisation_over_tenors",
            sets_test[0],
            sets_test_scaled[0], [25, 50, 75, 815],
            maturities,
            plot_separate=True)  # old: [25, 50, 75, 100, 600, 720, 740, 815]

    def logreturns_over_tenors(self):
        preprocess = PreprocessData(PreprocessType.LOG_RETURNS_OVER_TENORS)
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
        )

        self.plotting.plot_some_curves(
            "logreturns_over_curves",
            sets_test[0],
            sets_test_scaled[0], [25, 50, 75, 815],
            maturities,
            plot_separate=True)  # old: [25, 50, 75, 100, 600, 720, 740, 815]

        self.plotting.plot_3d(
            "logreturns_over_curves_3d",
            sets_test_scaled[0],
        )

    def normalisation_over_curves(self):
        preprocess = PreprocessData()
        preprocess.enable_normalisation_scaler = True
        preprocess.enable_ignore_price = True
        preprocess.feature_range = [0, 1]
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
        )

        self.plotting.plot_some_curves(
            "normalisation_over_curves",
            sets_test[0],
            sets_test_scaled[0], [25, 50, 75, 815],
            maturities,
            plot_separate=True)  # old: [25, 50, 75, 100, 600, 720, 740, 815]

    def standardisation_over_curves(self):
        print("todo standardisation_over_curves")

    def logreturns_over_curves(self):
        print("todo logreturns_over_curves")

    def all_log_returns(self):
        preprocess_data = PreprocessData()
        plotting = Plotting()

        preprocess_data.enable_log_returns = True
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data(
        )
        for i, set_training_scaled in enumerate(sets_training_scaled):
            print("set_training_scaled.shape", set_training_scaled.shape, i)
            plotting.plot_2d(set_training_scaled,
                             "/time_series/" + training_dataset_names[i],
                             timeseries=True,
                             save=False,
                             title=True)

    def all_normalised_data(self):
        preprocess_data = PreprocessData()

        preprocess_data.enable_normalisation_scaler = True
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data(
        )

        for i, set_training_scaled in enumerate(sets_training_scaled):
            self.plotting.plot_2d(set_training_scaled,
                                  "/time_series/" + training_dataset_names[i],
                                  timeseries=True,
                                  save=True,
                                  title=True)

        for i, set_test_scaled in enumerate(sets_test_scaled):
            self.plotting.plot_2d(set_test_scaled,
                                  "/time_series/" + test_dataset_names[i],
                                  timeseries=True,
                                  save=True,
                                  title=True)

    def all_data(self, show_title=False):
        preprocess_data = PreprocessData(extend_data=False)
        sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data(
        )

        print("maturities", maturities)

        for i, set_training in enumerate(sets_training):
            print(self.training_dataset_names[i])
            print(set_training.index[0], set_training.index[-1],
                  round(np.min(set_training.min()), 2),
                  round(np.max(set_training.max()), 2))
            # self.plotting.plot_2d(set_training, "/time_series/" + training_dataset_names[i], timeseries=True,
            #                  save=True, title=show_title)

            # self.plotting.plot_3d("/time_series/" + training_dataset_names[i] + "_3d", set_training, show_title=show_title)

            cov_log_returns = cov_log_returns_over_tenors(set_training)
            # self.plotting.plot_3d_cov("/time_series/" + training_dataset_names[i] + "_cov", cov_log_returns, maturities=maturities, show_title=show_title)

            print("\n")

        for i, set_test in enumerate(sets_test):
            print(self.test_dataset_names[i])
            print(set_test.index[0], set_test.index[-1],
                  round(np.min(set_test.min()), 2),
                  round(np.max(set_test.max()), 2))
            self.plotting.plot_2d(set_test,
                                  "/time_series/" + test_dataset_names[i],
                                  timeseries=True,
                                  save=True,
                                  title=show_title)
            self.plotting.plot_3d("/time_series/" + test_dataset_names[i] +
                                  "_3d",
                                  set_test,
                                  show_title=show_title)

            cov_log_returns = cov_log_returns_over_tenors(set_test)
            # self.plotting.plot_3d_cov("/time_series/" + test_dataset_names[i] + "_cov", cov_log_returns, maturities=maturities, show_title=show_title)

            print("\n")
class GAN:
    def __init__(self, params, plot=True):

        self.config = Config()
        self.plotting = Plotting()

        self.params = params
        self.plot = plot

        # Number of Conditioning, Random and Prediction returns
        self.num_c = params["num_c"]
        self.num_z = params["num_z"]
        self.num_o = params["num_o"]
        self.num_tenors = params["num_tenors"]

        optimizer = Adam(1e-5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss=params["loss"],
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates imgs
        condition = Input(shape=(self.num_c, self.num_tenors))
        noise = Input(shape=(self.num_z, self.num_tenors))
        img = self.generator([condition, noise])

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated images as input and determines validity
        validity = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self.combined = Model([condition, noise], validity)
        self.combined.compile(loss=params["loss"], optimizer=optimizer)

    def build_generator(self):

        model = Sequential()

        if self.params['gen_model_type'] == 'standard':
            model.add(
                Flatten(input_shape=(self.num_c + self.num_z,
                                     self.num_tenors)))

            for i in np.arange(len(self.params['gen_layers'])):
                model.add(
                    Dense(self.params['gen_layers'][i], activation='relu')
                )  # input_dim=(self.num_c + self.num_z, self.num_tenors)
                # model.add(LeakyReLU(alpha=self.params['leaky_relu']))

        elif self.params['gen_model_type'] == 'conv':
            model.add(
                Conv1D(28,
                       kernel_size=5,
                       padding="same",
                       data_format="channels_last",
                       activation='relu',
                       input_shape=(self.num_c + self.num_z, self.num_tenors))
            )  # for termporal data we should use padding valid
            model.add(
                Conv1D(2,
                       kernel_size=3,
                       padding="same",
                       data_format="channels_last",
                       activation='relu',
                       input_shape=(self.num_c + self.num_z, self.num_tenors)))
            model.add(MaxPooling1D(pool_size=2))
            model.add(Flatten())

        # final layers
        model.add(
            Dense(np.prod((self.num_o, self.num_tenors)),
                  activation=self.params['gen_last_activation']))
        model.add(Reshape((self.num_o, self.num_tenors)))

        print("-" * 20 + "\ngan generator")
        model.summary()

        condition = Input(shape=(self.num_c, self.num_tenors))
        z = Input(shape=(self.num_z, self.num_tenors))
        model_input = concatenate([condition, z], axis=1)

        out = model(model_input)

        return Model([condition, z], concatenate([condition, out], axis=1))

    def build_discriminator(self):

        model = Sequential()

        if self.params['dis_model_type'] == 'standard':
            model.add(
                Flatten(input_shape=(self.num_c + self.num_o,
                                     self.num_tenors)))

            for i in np.arange(len(self.params['dis_layers'])):
                model.add(
                    Dense(self.params['dis_layers'][i], activation='relu'))
                # model.add(LeakyReLU(alpha=self.params['leaky_relu']))

        elif self.params['dis_model_type'] == 'conv':
            model.add(
                Conv1D(32,
                       kernel_size=4,
                       strides=1,
                       padding='same',
                       activation='relu',
                       input_shape=(self.num_c + self.num_z, self.num_tenors)))
            model.add(MaxPooling1D(pool_size=2))
            model.add(Flatten())

        # final layer
        model.add(Dense(1, activation=self.params['dis_last_activation']))

        print("-" * 20 + "\ngan discriminator")
        model.summary()

        model_input = Input(shape=(self.num_c + self.num_o, self.num_tenors))
        validity = model(model_input)

        return Model(model_input, validity)

    def train(self,
              data_train,
              name=None,
              sample_interval=200,
              epochs=None,
              batch_size=None):

        if epochs is None:
            epochs = self.params['epochs']
        if batch_size is None:
            batch_size = self.params['batch_size']

        discriminator_loss = []
        discriminator_acc = []
        generator_loss = []

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            real = self.collect_samples(data_train, 2 * batch_size,
                                        self.num_c + self.num_o)
            real_labels = np.ones((2 * batch_size, 1))

            d_loss_real = self.discriminator.train_on_batch(real, real_labels)

            # Generate a batch of new images
            condition = self.collect_samples(data_train, batch_size,
                                             self.num_c)
            noise = np.random.normal(size=(batch_size, self.num_z,
                                           self.num_tenors))  # THIS WORKS!
            gen_imgs = self.generator.predict([condition, noise])
            fake_labels = np.zeros((batch_size, 1))

            d_loss_fake = self.discriminator.train_on_batch(
                gen_imgs, fake_labels)

            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            real = self.collect_samples(data_train, batch_size,
                                        self.num_c)  # THIS ALSO WORKS
            # noise = self.collect_samples(G, batch_size, num_z)  # THIS WORKS!
            noise = np.random.normal(size=(batch_size, self.num_z,
                                           self.num_tenors))
            real_labels = np.ones((batch_size, 1))

            # Train the generator (to have the discriminator label samples as valid)
            g_loss = self.combined.train_on_batch([real, noise], real_labels)

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                # record progress
                print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" %
                      (epoch, d_loss[0], 100 * d_loss[1], g_loss))
                discriminator_loss.append(d_loss[0])
                discriminator_acc.append(d_loss[1])
                generator_loss.append(g_loss)

                if np.isnan(d_loss[0]) or np.isnan(g_loss):
                    # something has gone wrong :(
                    break

                # plot simulation
                if self.plot:
                    generated, real_ = self.generate(condition=data_train,
                                                     num_simulations=1)
                    self.plotting.plot_3d_training(
                        "gan_3d_simple_training/" + "%d" % epoch, generated,
                        real_)

        if self.plot:
            self.plotting.plot_losses(discriminator_loss,
                                      discriminator_acc,
                                      generator_loss,
                                      "gan 3d simple training",
                                      legend=[
                                          'discriminator loss',
                                          'discriminator acc', 'generator loss'
                                      ])

        if name is not None:
            self.save_model(name)

    def generate(self,
                 condition=None,
                 condition_on_end=True,
                 num_simulations=1,
                 remove_condition=True,
                 repeat=None):

        if isinstance(condition, pd.DataFrame):
            _condition = np.array(condition)
        else:
            _condition = condition.copy()

        print("_condition", _condition.shape)

        if condition_on_end:
            if isinstance(condition, list):
                _condition = _condition[0][np.newaxis, -self.num_c:]
            elif len(condition.shape) == 2:
                _condition = _condition[np.newaxis, -self.num_c:]
            else:
                _condition = _condition[:, -self.num_c:]
        else:  # not condition_on_end:
            if type(condition) is list:
                _condition = _condition[0][np.newaxis, :self.num_c]
            elif len(condition.shape) == 2:
                _condition = _condition[np.newaxis, :self.num_c]
            else:  # len(condition.shape) == 3:
                _condition = _condition[:, :self.num_c]

        print("_condition after", _condition.shape)

        # override num_simulations if _conditions already is a 2d array
        _num_simulations = 1
        if num_simulations > 1:
            _condition = np.repeat(_condition, num_simulations, axis=0)
            _num_simulations = num_simulations
        elif len(_condition.shape) > 1 and _condition.shape[0] is not 1:
            _num_simulations = _condition.shape[0]

        noise = np.random.normal(size=(_num_simulations, self.num_z,
                                       self.num_tenors))
        generated = self.generator.predict([_condition, noise])

        if remove_condition:
            generated = generated[:, self.num_c:, :]

        if isinstance(repeat, int) and repeat > 0:
            for _ in np.arange(repeat - 1):
                generated_temp, _ = self.generate(condition=generated,
                                                  remove_condition=True)
                generated = np.append(generated, generated_temp, axis=1)

        return generated, _condition

    def collect_samples(self,
                        data,
                        batch_size,
                        pattern_len,
                        ret_indices=False,
                        indices=None):

        if type(data) is list:
            _data = np.array(data[np.random.randint(len(data))])
        else:
            _data = np.array(data)

        n = _data.shape[0] - pattern_len + 1
        if indices is None:
            indices = np.random.randint(n, size=batch_size)
        if ret_indices:
            return np.array([_data[a:a + pattern_len, :]
                             for a in indices]), indices
        else:
            return np.array([_data[a:a + pattern_len, :] for a in indices])

    def save_model(self, name):
        self.generator.save(
            self.config.get_filepath_gan_model(name + "_3d_simple_generator"))
        self.discriminator.save(
            self.config.get_filepath_gan_model(name +
                                               "_3d_simple_discriminator"))
        self.combined.save(
            self.config.get_filepath_gan_model(name + "_3d_simple_combined"))

    def load_model(self, name):
        generator_filepath = self.config.get_filepath_gan_model(
            name + "_3d_simple_generator")
        discriminator_filepath = self.config.get_filepath_gan_model(
            name + "_3d_simple_discriminator")
        combined_filepath = self.config.get_filepath_gan_model(
            name + "_3d_simple_combined")

        if self.config.file_exists(
                generator_filepath) and self.config.file_exists(
                    discriminator_filepath) and self.config.file_exists(
                        combined_filepath):
            self.generator = load_model(generator_filepath)
            self.discriminator = load_model(discriminator_filepath)
            self.combined = load_model(combined_filepath)
            return True
        else:
            print("trained model does not exist yet!")
            print(self.config.file_exists(generator_filepath),
                  self.config.file_exists(discriminator_filepath),
                  self.config.file_exists(combined_filepath))
            print(generator_filepath, discriminator_filepath,
                  combined_filepath)
            return False

    def load_else_train(self, x_train, name):
        did_load = self.load_model(name)
        if not did_load:
            self.train(x_train)
            self.save_model(name)
def simulate():
    plotting = Plotting()
    preprocess_normalisation = PreprocessData()
    preprocess_normalisation.enable_normalisation_scaler = True
    preprocess_normalisation.feature_range = [-1, 1]
    # preprocess_normalisation.enable_ignore_price = True

    # 1. get data and apply normalisation
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data(
    )
    all_training_scaled = np.vstack(sets_training_scaled)

    ae_params = {
        'input_dim': sets_training_scaled[0].shape[1],  # 56
        'latent_dim': 3,
        'hidden_layers': (
            56,
            40,
            28,
            12,
            4,
        ),
        'leaky_relu': 0.1,
        'last_activation': 'linear',  # sigmoid or linear
        'loss':
        'mean_square_error',  # binary_crossentropy or mean_square_error
        'epsilon_std': 1.0,
        'batch_size': 20,
        'epochs': 100,
        'steps_per_epoch': 500
    }
    ae_params_hash = hashlib.md5(
        json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

    # 2. train/load variational autoencoder
    vae = VariationalAutoencoder(ae_params)

    vae.train(all_training_scaled, sets_test_scaled)
    vae.save_model("vae_" + ae_params_hash)
    # vae.load_model("vae_" + ae_params_hash)

    # 3: encode data using autoencoder
    sets_encoded_training = []
    for set_training_scaled in sets_training_scaled:
        sets_encoded_training.append(vae.encode(set_training_scaled))

    sets_encoded_test = []
    for set_test_scaled in sets_test_scaled:
        sets_encoded_test.append(vae.encode(set_test_scaled))

    # 4: decode using vae
    decoded_data = vae.decode(sets_encoded_test[0])

    # 7: undo minimax, for now only the first simulation
    simulated = preprocess_normalisation.rescale_data(
        decoded_data, dataset_name=test_dataset_names[0])

    # reconstruction error
    # reconstruction_error(sets_test_scaled[0], decoded_data)
    reconstruction_error(np.array(sets_test[0]), simulated)

    # plot latent space
    plotting.plot_2d(sets_encoded_test[0],
                     "test_feature_normalised_encoded_vae_on_",
                     save=True)
    plotting.plot_space(maturities,
                        vae,
                        "variational_grid",
                        latent_dim=sets_encoded_test[0].shape[1])

    # plot scaled results
    plotting.plot_some_curves("test_feature_normalised_compare_vae_scaled",
                              sets_test_scaled[0], decoded_data,
                              [25, 50, 75, 815], maturities)

    plotting.plot_some_curves("test_feature_normalised_compare_vae",
                              sets_test[0], simulated, [25, 50, 75, 815],
                              maturities)
예제 #15
0
def simulate(plot=True):
    plotting = Plotting()
    preprocess = PreprocessData()
    preprocess.enable_normalisation_scaler = True
    preprocess.feature_range = [0, 1]

    window_size = 20

    # 1. get data and apply normalisation
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
        chunks_of=window_size)

    print("sets_training_scaled.shape", sets_training_scaled[0].shape)

    # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False)
    # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True)

    ae_params = {
        'input_dim': (
            window_size,
            sets_training_scaled[0].shape[1],
        ),  # 10 x 56
        'latent_dim': (
            2,
            56,
        ),
        'hidden_layers': (
            12 * 56,
            4 * 56,
        ),
        'leaky_relu': 0.1,
        'loss': 'mse',
        'last_activation': 'linear',
        'batch_size': 20,
        'epochs': 100,
        'steps_per_epoch': 500,
    }
    ae_params_hash = hashlib.md5(
        json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

    autoencoder = AutoencoderWindows(ae_params)

    print("sets_training_scaled", sets_training_scaled[0].shape)

    autoencoder.train(sets_training_scaled, sets_test_scaled)
    autoencoder.save_model("ae_" + ae_params_hash)
    # autoencoder.load_model("ae_" + ae_params_hash)

    # 2: encode data using autoencoder
    sets_encoded_training = []
    for set_training_scaled in sets_training_scaled:
        sets_encoded_training.append(autoencoder.encode(set_training_scaled))

    sets_encoded_test = []
    for set_test_scaled in sets_test_scaled:
        sets_encoded_test.append(autoencoder.encode(set_test_scaled))

    print("sets_encoded_training", len(sets_encoded_training),
          sets_encoded_training[0].shape)
    print("sets_encoded_test", sets_encoded_test[0].shape)

    # 6: decode using autoencoder
    decoded_test = autoencoder.decode(sets_encoded_test[0])

    print("decoded_test", decoded_test.shape)

    # 7: undo minimax, for now only the first simulation
    # decoded_generated_segments_first_sim = decoded_generated_segments[0]
    preprocess.enable_curve_smoothing = True
    simulated_smooth = preprocess.rescale_data(
        decoded_test, dataset_name=test_dataset_names[0])

    # reconstruction error
    # reconstruction_error(sets_test_scaled[0], decoded_test)
    # error = reconstruction_error(np.array(sets_test[0]), simulated_smooth)
    # print("error:", error)

    smape_result_smooth = smape(simulated_smooth,
                                np.array(sets_test[0]),
                                over_curves=True)

    print(np.mean(smape_result_smooth), np.var(smape_result_smooth))

    if plot:
        # plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_autoencoder_on_", save=True)

        # plotting.plot_some_curves("normalised_compare_ae_before_rescale", sets_test_scaled[0], decoded_test,
        #                           [25, 50, 75, 815], maturities)

        plotting.plot_some_curves("normalised_compare_ae", sets_test[0],
                                  simulated_smooth, [25, 50, 75, 815],
                                  maturities)
예제 #16
0
def simulate():
    plotting = Plotting()
    preprocess_minmax = PreprocessData()
    preprocess_logreturns = PreprocessData()
    preprocess_minmax.enable_min_max_scaler = True
    preprocess_logreturns.enable_log_returns = True

    # 1. get data and apply minimax
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_minmax.get_data(
    )

    print("sets_training_scaled.shape", sets_training_scaled[0].shape)

    autoencoder = DeepAutoencoder(
        input_shape=(sets_training_scaled[0].shape[1], ), latent_dim=2)
    # autoencoder.train(np.vstack(sets_training_scaled), sets_test_scaled, epochs=100, batch_size=5)
    # autoencoder.save_model("deep_general_minimax")
    autoencoder.load_model("deep_general_minimax")

    # 2: encode data using autoencoder
    sets_encoded_training = []
    for set_training_scaled in sets_training_scaled:
        sets_encoded_training.append(autoencoder.encode(set_training_scaled))

    sets_encoded_test = []
    for set_test_scaled in sets_test_scaled:
        sets_encoded_test.append(autoencoder.encode(set_test_scaled))

    plotting.plot_2d(sets_encoded_test[0],
                     "encoded test data with deep autoencoder",
                     save=False)

    # 3: log returns of encoded data
    sets_encoded_log_training = []
    for index, set_encoded_training in enumerate(sets_encoded_training):
        sets_encoded_log_training.append(
            preprocess_logreturns.scale_data(set_encoded_training))

    sets_encoded_log_test = []
    for index, set_encoded_test in enumerate(sets_encoded_test):
        sets_encoded_log_test.append(
            preprocess_logreturns.scale_data(set_encoded_test))

    plotting.plot_2d(
        sets_encoded_log_test[0],
        "encoded test data with deep autoencoder, then log returns",
        save=False)

    num_tenors = sets_encoded_log_training[0].shape[1]
    gan = GAN(num_c=6 * 7, num_z=6 * 7, num_o=6 * 7,
              num_tenors=num_tenors)  # try training on larger input and output
    # gan.train(sets_encoded_log_training, epochs=20000, batch_size=100, sample_interval=200)
    # gan.save_model("general_ae")
    gan.load_model("general_ae")

    print("sets_encoded_log_test[0].shape", sets_encoded_log_test[0].shape)

    test_arr = np.full([1, 6 * 7 + 6 * 7, num_tenors], 10)

    validity = gan.discriminator.predict(
        test_arr)  # np.array(sets_encoded_log_test[0]
    print(validity)

    rolled_encoded_log_test = rolling_windows(sets_encoded_log_test[0],
                                              6 * 7 + 6 * 7)

    validity = gan.discriminator.predict(
        rolled_encoded_log_test)  # np.array(sets_encoded_log_test[0]
    print(validity)
def simulate():
    plotting = Plotting()
    preprocess_type = PreprocessType.STANDARDISATION_OVER_TENORS
    preprocess = PreprocessData(preprocess_type)

    # 1. get data and apply minimax
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
    )
    all_training_scaled = np.vstack(sets_training_scaled)

    ae_params = {
        'preprocess_type':
        preprocess_type.value,  # only to make preprocess_type part of the hash
        'input_dim': sets_training_scaled[0].shape[1],  # 56
        'latent_dim': 2,
        'hidden_layers': (
            56,
            40,
            28,
            12,
            4,
        ),
        'leaky_relu': 0.1,
        'loss': 'mse',
        'last_activation': 'linear',
        'batch_size': 20,
        'epochs': 100,
        'steps_per_epoch': 500
    }
    ae_params_hash = hashlib.md5(
        json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

    autoencoder = Autoencoder(ae_params)
    autoencoder.load_else_train(all_training_scaled, sets_test_scaled,
                                "ae_" + ae_params_hash)

    # 2: encode data using autoencoder

    encoded = autoencoder.encode(sets_test_scaled[0])
    decoded = autoencoder.decode(encoded)

    rescaled = preprocess.rescale_data(decoded,
                                       dataset_name=test_dataset_names[0])
    smape_result = smape(rescaled, np.array(sets_test[0]), over_curves=True)

    print("smape_result test set", np.mean(smape_result), np.std(smape_result),
          np.min(smape_result), np.max(smape_result))

    plotting.plot_2d(sets_test[0],
                     "evaluation of test curves",
                     timeseries=True,
                     evaluation=smape_result,
                     title=False)

    # for i in np.arange(len(test_eval)):
    #     if test_eval[i] > 4:
    #         plotting.plot_2d(sets_test_scaled[0][i], "Possible unrealistic curve" + str(i), save=False, title=True)

    # 3: lets see how well the autoencoder can map a zero vector
    # todo: generate random curves, THEN apply min-max feature scaling, THEN evaluate
    unrealistic_curves = []
    curve_shape = 56
    unrealistic_curves.append(np.full(curve_shape, 5))
    unrealistic_curves.append(np.full(curve_shape, 10))
    unrealistic_curves.append(np.full(curve_shape, 20))
    unrealistic_curves.append(np.full(curve_shape, 50))
    unrealistic_curves.append(np.full(curve_shape, 70))
    unrealistic_curves.append(np.full(curve_shape, 100))
    unrealistic_curves.append(np.full(curve_shape, 150))
    unrealistic_curves.append(np.full(curve_shape, 200))
    unrealistic_curves.append(np.full(curve_shape, 250))
    unrealistic_curves.append(np.full(curve_shape, 300))
    unrealistic_curves.append(
        np.hstack((np.full(int(curve_shape / 2),
                           50), np.full(int(curve_shape / 2), 150))))
    unrealistic_curves.append(
        np.hstack((np.full(int(curve_shape / 2),
                           100), np.full(int(curve_shape / 2), 150))))
    unrealistic_curves.append(
        np.hstack((np.full(int(curve_shape / 2),
                           100), np.full(int(curve_shape / 2), 200))))
    unrealistic_curves.append(np.random.uniform(0, 10, curve_shape))
    unrealistic_curves.append(np.random.uniform(10, 70, curve_shape))
    unrealistic_curves.append(np.random.uniform(0, 100, curve_shape))
    unrealistic_curves.append(np.random.uniform(100, 200, curve_shape))
    unrealistic_curves.append(np.random.uniform(200, 300, curve_shape))
    unrealistic_curves.append(np.random.uniform(0, 200, curve_shape))
    unrealistic_curves.append(np.random.uniform(0, 250, curve_shape))
    unrealistic_curves.append(np.random.uniform(0, 300, curve_shape))
    unrealistic_curves.append(np.linspace(0, 100, num=curve_shape))
    unrealistic_curves.append(np.linspace(50, 150, num=curve_shape))
    unrealistic_curves.append(np.linspace(100, 200, num=curve_shape))
    unrealistic_curves.append(np.linspace(150, 250, num=curve_shape))
    unrealistic_curves.append(np.linspace(200, 300, num=curve_shape))
    unrealistic_curves.append(np.linspace(0, 200, num=curve_shape))
    unrealistic_curves.append(np.linspace(0, 300, num=curve_shape))
    unrealistic_curves.append(np.linspace(100, 0, num=curve_shape))
    unrealistic_curves.append(np.linspace(150, 50, num=curve_shape))
    unrealistic_curves.append(np.linspace(200, 100, num=curve_shape))
    unrealistic_curves.append(np.linspace(250, 150, num=curve_shape))
    unrealistic_curves.append(np.linspace(300, 200, num=curve_shape))
    unrealistic_curves.append(np.linspace(200, 0, num=curve_shape))
    unrealistic_curves.append(np.linspace(300, 0, num=curve_shape))
    unrealistic_curves = np.array(unrealistic_curves)
    print("unrealistic_curves.shape", unrealistic_curves.shape)

    unrealistic_curves_scaled = preprocess.scale_data(
        unrealistic_curves,
        dataset_name=training_dataset_names[0],
        should_fit=True)

    encoded = autoencoder.encode(unrealistic_curves_scaled)
    decoded = autoencoder.decode(encoded)

    rescaled = preprocess.rescale_data(decoded,
                                       dataset_name=training_dataset_names[0])
    smape_result = smape(rescaled, unrealistic_curves, over_curves=True)

    round_to_n = lambda x, n: round(x, -int(np.floor(np.log10(x))) + (n - 1))

    print("smape results", smape_result)
    for a_smape_result in smape_result:
        print(round_to_n(a_smape_result, 2))

    plotting.plot_2d(smape_result,
                     "loss of unrealistic curves from autoencoder SMAPE",
                     save=False,
                     title=True)
    plotting.plot_2d(smape_result,
                     "loss of unrealistic curves from autoencoder SMAPE",
                     save=False,
                     title=True)
    # plotting.plot_2d(unrealistic_eval_mse, "loss of unrealistic curves from autoencoder MSE", save=False, title=True)
    plotting.plot_unrealisticness(
        unrealistic_curves,
        "loss of unrealistic curves from autoencoder",
        timeseries=True,
        evaluation=smape_result,
        title=False,
        eval_label="SMAPE")
def simulate(latent_dim=2,
             plot=False,
             preprocess_type=None,
             model_type=None,
             force_training=True):
    plotting = Plotting()
    preprocess = PreprocessData(preprocess_type)

    window_size = None
    if model_type is AEModel.AE_WINDOWS:
        window_size = 10

    # 1. get data and apply normalisation
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
        chunks_of=window_size)
    all_training_scaled = np.vstack(sets_training_scaled)

    if model_type is AEModel.AAE:
        ae_params = {
            'preprocess_type': preprocess_type.
            value,  # only to make preprocess_type part of the hash
            'input_dim': sets_training_scaled[0].shape[1],  # 56
            'latent_dim': latent_dim,
            'hidden_layers': (
                56,
                40,
                28,
                12,
                4,
            ),
            'hidden_layers_discriminator': (
                2,
                2,
            ),
            'leaky_relu': 0.1,
            'last_activation': 'linear',
            'last_activation_discriminator': 'sigmoid',
            'loss_generator': 'mean_squared_error',
            'loss_discriminator': 'binary_crossentropy',
            'batch_size': 20,
            'epochs': 20000
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

        # 2. train/load variational autoencoder
        autoencoder = AdversarialAutoencoder(ae_params, plot=False)
    elif model_type is AEModel.VAE:
        ae_params = {
            'preprocess_type': preprocess_type.
            value,  # only to make preprocess_type part of the hash
            'input_dim': sets_training_scaled[0].shape[1],  # 56
            'latent_dim': latent_dim,
            'hidden_layers': (
                56,
                40,
                28,
                12,
                4,
            ),
            'leaky_relu': 0.1,
            'last_activation': 'linear',  # sigmoid or linear
            'loss':
            'mean_squared_error',  # binary_crossentropy or mean_square_error
            'epsilon_std': 1.0,
            'batch_size': 20,
            'epochs': 100,
            'steps_per_epoch': 500
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

        # 2. train/load variational autoencoder
        autoencoder = VariationalAutoencoder(ae_params, plot=False)
    elif model_type is AEModel.AE:
        ae_params = {
            'preprocess_type': preprocess_type.
            value,  # only to make preprocess_type part of the hash
            'input_dim': sets_training_scaled[0].shape[1],  # 56
            'latent_dim': latent_dim,
            'hidden_layers': (
                56,
                40,
                28,
                12,
                4,
            ),
            'leaky_relu': 0.1,
            'loss': 'mse',
            'last_activation': 'linear',
            'batch_size': 20,
            'epochs': 100,
            'steps_per_epoch': 500
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()
        autoencoder = Autoencoder(ae_params, plot=False)
    elif model_type is AEModel.PCA:
        ae_params = {
            'preprocess_type': preprocess_type.
            value,  # only to make preprocess_type part of the hash
            'latent_dim': latent_dim
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()
        autoencoder = PCAModel(ae_params, plot=False)
    else:  # model_type is AEModel.AE_WINDOWS:
        ae_params = {
            'input_dim': (
                window_size,
                sets_training_scaled[0].shape[1],
            ),  # 10 x 56
            'latent_dim': (
                2,
                56,
            ),
            'hidden_layers': (
                12 * 56,
                4 * 56,
            ),
            'leaky_relu': 0.1,
            'loss': 'mse',
            'last_activation': 'linear',
            'batch_size': 20,
            'epochs': 10,
            'steps_per_epoch': 500,
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()
        autoencoder = AutoencoderWindows(ae_params, plot=False)

    if force_training:
        autoencoder.train(all_training_scaled, sets_test_scaled,
                          "ae_" + ae_params_hash)
    else:
        autoencoder.load_else_train(all_training_scaled, sets_test_scaled,
                                    "ae_" + ae_params_hash)

    # 2: encode data using autoencoder
    sets_encoded_training = autoencoder.encode(sets_training_scaled)
    sets_encoded_test = autoencoder.encode(sets_test_scaled)

    # 6: decode using autoencoder
    decoded_test = autoencoder.decode(sets_encoded_test[0])

    # 7: undo scaling
    # decoded_generated_segments_first_sim = decoded_generated_segments[0]
    simulated = preprocess.rescale_data(decoded_test,
                                        dataset_name=test_dataset_names[0])

    preprocess.enable_curve_smoothing = True
    simulated_smooth = preprocess.rescale_data(
        decoded_test, dataset_name=test_dataset_names[0])

    # reconstruction error
    # error = reconstruction_error(np.array(sets_test[0]), simulated)
    # error_smooth = reconstruction_error(np.array(sets_test[0]), simulated_smooth)

    smape_result = smape(simulated, np.array(sets_test[0]), over_curves=True)
    smape_result_smooth = smape(simulated_smooth,
                                np.array(sets_test[0]),
                                over_curves=True)

    print(np.mean(smape_result_smooth))

    if plot and model_type is not AEModel.AE_WINDOWS:

        plotting.plot_2d(sets_encoded_test[0],
                         preprocess_type.name + "_" + model_type.name +
                         "_latent_space",
                         sets_test_scaled[0].index.values,
                         save=True)

        plotting.plot_some_curves(
            preprocess_type.name + "_" + model_type.name + "_in_vs_out",
            sets_test[0], simulated, [25, 50, 75, 815], maturities)

        # plotting.plot_some_curves("normalised_compare_ae", sets_test[0], sets_test_scaled[0],
        #                           [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True)

        preprocess.enable_curve_smoothing = False
        if model_type is AEModel.VAE:
            plotting.plot_grid_2dim(maturities,
                                    autoencoder.generator_model,
                                    preprocess_type.name + "_" +
                                    model_type.name + "_latent_grid",
                                    preprocess,
                                    test_dataset_names[0],
                                    n=6)
        elif model_type is AEModel.AAE:
            plotting.plot_grid_2dim(maturities,
                                    autoencoder.decoder,
                                    preprocess_type.name + "_" +
                                    model_type.name + "_latent_grid",
                                    preprocess,
                                    test_dataset_names[0],
                                    n=6)

    return smape_result_smooth
예제 #19
0
from helpers.preprocess_data import PreprocessData
from helpers.evaluate import *
from helpers.plotting import Plotting
from imputance.gain_model import gain
import numpy as np
import matplotlib.pyplot as plt

if __name__ == '__main__':
    plotting = Plotting()
    preprocess = PreprocessData(PreprocessType.STANDARDISATION_OVER_TENORS,
                                short_end=True)
    preprocess2 = PreprocessData(PreprocessType.LOG_RETURNS_OVER_TENORS,
                                 short_end=True)
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
    )

    sets_encoded_log_training = preprocess2.scale_data(sets_training_scaled,
                                                       training_dataset_names,
                                                       should_fit=True)
    sets_encoded_log_test = preprocess2.scale_data(sets_test_scaled,
                                                   test_dataset_names,
                                                   should_fit=True)

    train = sets_encoded_log_training[0].copy()
    test = sets_encoded_log_test[0].copy()

    # print("train.shape[1]", train.shape[1])
    # print("sets_test_scaled[0]", sets_test_scaled[0].shape)
    # print("sets_encoded_log_test[0]", sets_encoded_log_test[0].shape)

    params = {
def simulate(plot=True):
    plotting = Plotting()
    preprocess = PreprocessData()
    preprocess.enable_normalisation_scaler = True
    preprocess.feature_range = [0, 1]

    # 1. get data and apply normalisation
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data(
    )

    print("sets_training_scaled.shape", sets_training_scaled[0].shape)

    # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False)
    # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True)

    ae_params = {
        'input_dim': sets_training_scaled[0].shape[1],  # 56
        'latent_dim': 2,
        'hidden_layers': (
            56,
            40,
            28,
            12,
            4,
        ),
        'leaky_relu': 0.1,
        'loss': 'mse',
        'last_activation': 'linear',
        'batch_size': 20,
        'epochs': 100,
        'steps_per_epoch': 500
    }
    ae_params_hash = hashlib.md5(
        json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

    autoencoder = Autoencoder(ae_params)
    autoencoder.train(sets_training_scaled, sets_test_scaled)
    autoencoder.save_model("ae_" + ae_params_hash)
    # autoencoder.load_model("ae_" + ae_params_hash)

    # 2: encode data using autoencoder
    sets_encoded_training = []
    for set_training_scaled in sets_training_scaled:
        sets_encoded_training.append(autoencoder.encode(set_training_scaled))

    sets_encoded_test = []
    for set_test_scaled in sets_test_scaled:
        sets_encoded_test.append(autoencoder.encode(set_test_scaled))

    # 6: decode using autoencoder
    decoded_test = autoencoder.decode(sets_encoded_test[0])

    # 7: undo minimax, for now only the first simulation
    # decoded_generated_segments_first_sim = decoded_generated_segments[0]
    simulated = preprocess.rescale_data(decoded_test,
                                        dataset_name=test_dataset_names[0])

    # reconstruction error
    # reconstruction_error(sets_test_scaled[0], decoded_test)
    error = reconstruction_error(np.array(sets_test[0]), simulated)

    if plot:
        plotting.plot_2d(sets_encoded_test[0],
                         "test_feature_normalised_encoded_autoencoder_on_",
                         save=True)

        plotting.plot_some_curves("normalised_compare_ae_before_rescale",
                                  sets_test_scaled[0], decoded_test,
                                  [25, 50, 75, 815], maturities)

        plotting.plot_some_curves("normalised_compare_ae", sets_test[0],
                                  simulated, [25, 50, 75, 815], maturities)

        plotting.plot_some_curves("normalised_compare_ae",
                                  sets_test[0],
                                  sets_test_scaled[0],
                                  [25, 50, 75, 815, 100, 600, 720, 740],
                                  maturities,
                                  plot_separate=True)

    return error
def simulate(latent_dim=2,
             preprocess_type1=None,
             preprocess_type2=None,
             ae_model=None,
             gan_model=None,
             force_training=True,
             plot=False):
    preprocess1 = PreprocessData(preprocess_type1)
    preprocess2 = PreprocessData(preprocess_type2)

    # 1. get data and apply scaling
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess1.get_data(
    )

    if ae_model is AEModel.AAE:
        ae_params = {
            'preprocess_type': preprocess_type1.
            value,  # only to make preprocess_type part of the hash
            'input_dim': sets_training_scaled[0].shape[1],  # 56
            'latent_dim': latent_dim,
            'hidden_layers': (
                56,
                40,
                28,
                12,
                4,
            ),
            'hidden_layers_discriminator': (
                2,
                2,
            ),
            'leaky_relu': 0.1,
            'last_activation': 'linear',
            'last_activation_discriminator': 'sigmoid',
            'loss_generator': 'mean_squared_error',
            'loss_discriminator': 'binary_crossentropy',
            'batch_size': 20,
            'epochs': 20000
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()
        autoencoder = AdversarialAutoencoder(ae_params, plot=False)
    elif ae_model is AEModel.VAE:
        ae_params = {
            'preprocess_type': preprocess_type1.
            value,  # only to make preprocess_type part of the hash
            'input_dim': sets_training_scaled[0].shape[1],  # 56
            'latent_dim': latent_dim,
            'hidden_layers': (
                56,
                40,
                28,
                12,
                4,
            ),
            'leaky_relu': 0.1,
            'last_activation': 'linear',  # sigmoid or linear
            'loss':
            'mean_square_error',  # binary_crossentropy or mean_square_error
            'epsilon_std': 1.0,
            'batch_size': 20,
            'epochs': 100,
            'steps_per_epoch': 500
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()
        autoencoder = VariationalAutoencoder(ae_params, plot=False)
    elif ae_model is AEModel.AE:
        ae_params = {
            'preprocess_type': preprocess_type1.
            value,  # only to make preprocess_type part of the hash
            'input_dim': sets_training_scaled[0].shape[1],  # 56
            'latent_dim': latent_dim,
            'hidden_layers': (
                56,
                40,
                28,
                12,
                4,
            ),
            'leaky_relu': 0.1,
            'loss': 'mse',
            'last_activation': 'linear',
            'batch_size': 20,
            'epochs': 100,
            'steps_per_epoch': 500
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()
        autoencoder = Autoencoder(ae_params, plot=False)
    else:  # elif ae_model is AEModel.PCA:
        ae_params = {
            'preprocess_type': preprocess_type1.
            value,  # only to make preprocess_type part of the hash
            'latent_dim': latent_dim
        }
        ae_params_hash = hashlib.md5(
            json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()
        autoencoder = PCAModel(ae_params, plot=False)

    # 2. train/load autoencoder
    autoencoder.load_else_train(np.vstack(sets_training_scaled),
                                sets_test_scaled, "ae_" + ae_params_hash)

    # 2: encode data using autoencoder
    sets_encoded_training = autoencoder.encode(sets_training_scaled)
    sets_encoded_test = autoencoder.encode(sets_test_scaled)

    # 3: log returns of encoded data
    sets_encoded_log_training = preprocess2.scale_data(sets_encoded_training,
                                                       training_dataset_names,
                                                       should_fit=True)
    sets_encoded_log_test = preprocess2.scale_data(sets_encoded_test,
                                                   test_dataset_names,
                                                   should_fit=True)

    num_z = 6 * 7
    num_c = 6 * 7
    num_o = 6 * 7
    if gan_model is GANModel.WGAN:
        gan_params = {
            'ae_params_hash': ae_params_hash,
            'num_tenors': sets_encoded_log_training[0].shape[1],
            'num_c': num_c,
            'num_z': num_z,
            'num_o': num_o,
            'gen_model_type': 'standard',  # conv
            'dis_model_type': 'standard',  # conv
            'gen_layers': (4 * (6 * 7 * 2), ),  # 4 * num_o * num_tenors
            'dis_layers': (4 * (6 * 7), ),  # 4 * num_o
            'gen_last_activation': 'tanh',
            'dis_last_activation': 'sigmoid',
            'loss': 'binary_crossentropy',
            'batch_size': 32,
            'epochs': 10000,
            'sample_interval': 1000
        }
        gan_params_hash = hashlib.md5(
            json.dumps(gan_params,
                       sort_keys=True).encode('utf-8')).hexdigest()
        gan = CWGANGP(gan_params, plot=False)
    else:
        if gan_model is GANModel.GAN_CONV:
            model_type = 'conv'
        else:  # if gan_model is GANModel.GAN:
            model_type = 'standard'

        gan_params = {
            'ae_params_hash': ae_params_hash,
            'num_tenors': sets_encoded_log_training[0].shape[1],
            'num_c': num_c,
            'num_z': num_z,
            'num_o': num_o,
            'gen_model_type': model_type,  # conv
            'dis_model_type': model_type,  # conv
            'gen_layers': (4 * (6 * 7 * 2), ),  # 4 * num_o * num_tenors
            'dis_layers': (4 * (6 * 7), ),  # 4 * num_o
            'gen_last_activation': 'tanh',
            'dis_last_activation': 'sigmoid',
            'loss': 'binary_crossentropy',
            'batch_size': 128,
            'epochs': 20000
        }
        gan_params_hash = hashlib.md5(
            json.dumps(gan_params,
                       sort_keys=True).encode('utf-8')).hexdigest()
        gan = GAN(gan_params,
                  plot=False)  # try training on larger input and output

    if force_training:
        gan.train(sets_encoded_log_training, "gan_" + gan_params_hash)
    else:
        gan.load_else_train(sets_encoded_log_training,
                            "gan_" + gan_params_hash)

    # 4: simulate on encoded log returns, conditioned on test dataset
    num_simulations = 100
    num_repeats = 1
    generated, _ = gan.generate(condition=sets_encoded_log_test[-1],
                                condition_on_end=False,
                                num_simulations=num_simulations,
                                repeat=num_repeats)

    # insert the last real futures curve in order to do rescaling
    if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS:
        generated = np.insert(generated,
                              0,
                              sets_encoded_log_test[-1].iloc[num_c],
                              axis=1)

    # 5: undo scaling
    encoded_generated = preprocess2.rescale_data(
        generated,
        start_value=sets_encoded_test[-1][num_c],
        dataset_name=test_dataset_names[-1])
    if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS:
        encoded_generated = encoded_generated[:,
                                              1:]  # remove first curve again

    # 6: decode using autoencoder
    decoded_generated_segments = autoencoder.decode(encoded_generated)

    # 7: undo scaling, this can be log-returns
    simulated = preprocess1.rescale_data(decoded_generated_segments,
                                         start_value=sets_test[-1].iloc[num_c],
                                         dataset_name=test_dataset_names[-1])

    preprocess1.enable_curve_smoothing = True
    simulated_smooth = preprocess1.rescale_data(
        decoded_generated_segments,
        start_value=sets_test[-1].iloc[num_c],
        dataset_name=test_dataset_names[-1])

    if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS:
        real = sets_test[-1].iloc[
            num_c:num_c + num_o * num_repeats +
            1]  # `+1` because the log-returns also does +1
    else:
        real = sets_test[-1].iloc[num_c:num_c + num_o * num_repeats + 1]

    print("simulated, real", simulated.shape, real.shape)

    smape_result = smape(simulated, real)
    smape_result_smooth = smape(simulated_smooth, real)

    print("smape_result_smooth mean and std:", np.mean(smape_result_smooth),
          np.std(smape_result_smooth))

    if plot:
        plotting = Plotting()
        plotting.plot_3d("real", real, show_title=False)

        cov_log_returns = cov_log_returns_over_tenors(real)
        plotting.plot_3d_cov("gan_real_cov", cov_log_returns, show_title=False)

        for i in np.arange(1, 11):
            # name =  '_' + preprocess_type1.name + '_' + preprocess_type2.name + '_' + str(latent_dim) + '_' + ae_model.name + '_'+ gan_model.name
            plotting.plot_3d("gan_simulated_" + str(i),
                             simulated_smooth[i],
                             maturities=maturities,
                             time=real.index.values,
                             show_title=False)
            smape_result = smape(simulated_smooth[i], real)
            print("simulated_smooth[i], real", simulated_smooth[i].shape,
                  real.shape)
            print("simulate rates", i)
            print("smape:", smape_result)
            print("=============\n")

            cov_log_returns = cov_log_returns_over_tenors(simulated_smooth[i])
            plotting.plot_3d_cov("gan_simulated_" + str(i) + "_cov",
                                 cov_log_returns,
                                 maturities=maturities,
                                 show_title=False)

    return smape_result_smooth
def main():
    plotting = Plotting()
    preprocess_normalisation = PreprocessData()
    preprocess_normalisation.enable_normalisation_scaler = True
    # preprocess_normalisation.enable_standardisation_scaler = True

    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data(
    )

    # sklearn model (check that it is doing the same (it is))
    # pca_model_sklearn = PCA(n_components=2)
    # pca_model_sklearn.fit(sets_test_scaled[0])
    # test_data_scaled_encoded = pca_model_sklearn.transform(sets_test_scaled[0])
    # test_data_scaled_decoded = pca_model_sklearn.inverse_transform(test_data_scaled_encoded)

    # our own model
    def pca_on_normalised():

        params = {'latent_dim': 2}
        pca_model = PCAModel(params)
        pca_model.train(np.vstack(sets_training_scaled))

        test_data_scaled_encoded = pca_model.encode(sets_test_scaled[0])
        test_data_scaled_decoded = pca_model.decode(test_data_scaled_encoded)

        print("sets_test_scaled[0].shape", sets_test_scaled[0].shape)
        print("test_data_scaled_encoded.shape", test_data_scaled_encoded.shape)
        print("test_data_scaled_decoded.shape", test_data_scaled_decoded.shape)

        # plot results
        plotting.plot_2d(test_data_scaled_encoded, "wti_nymex_encoded_pca")
        simulated = preprocess_normalisation.rescale_data(
            test_data_scaled_decoded, dataset_name=test_dataset_names[0])
        plotting.plot_some_curves("wti_nymex_normalised_compare_pca",
                                  sets_test[0], simulated, [25, 50, 75, 815],
                                  maturities)

        # plotting.plot_some_curves("test_feature_normalised_compare_normalisation", sets_test[0], sets_test_scaled[0],
        #                           [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True)

        # print("reconstruction_error", reconstruction_error(sets_test_scaled[0], test_data_scaled_decoded))
        # print("reconstruction_error", reconstruction_error(np.array(sets_test[0]), simulated))

        print("smape", smape(np.array(sets_test[0]), simulated))
        # print("smape", np.mean(smape(np.array(sets_test[0]), simulated, over_curves=True)))

    def pca_on_unnormalised():
        pca_model = PCAModel(k=2)
        pca_model.train(np.vstack(sets_training))
        test_data_encoded = pca_model.encode(np.array(sets_test[0]))
        test_data_decoded = pca_model.decode(test_data_encoded)

        # plot results
        plotting.plot_2d(test_data_encoded.T, "wti_nymex_pca")
        # simulated = preprocess_normalisation.rescale_data(test_data_decoded, dataset_name=test_dataset_names[0])
        plotting.plot_some_curves("wti_nymex_compare_pca", sets_test[0],
                                  test_data_decoded, [25, 50, 75, 815],
                                  maturities)

    # pca_on_unnormalised()
    pca_on_normalised()
예제 #23
0
def simulate():
    plotting = Plotting()
    preprocess_normalisation = PreprocessData()
    preprocess_normalisation.enable_normalisation_scaler = True
    preprocess_normalisation.feature_range = [0, 1]
    # preprocess_normalisation.enable_scaler = True

    # 1. get data and apply normalisation
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data(
    )

    # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False)
    # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True)

    all_stacked = np.vstack((np.vstack(sets_training), np.vstack(sets_test)))
    all_stacked_scaled = np.vstack(
        (np.vstack(sets_training_scaled), np.vstack(sets_test_scaled)))
    all_training_scaled = np.vstack(sets_training_scaled)

    # print("all_stacked_scaled.shape", all_stacked_scaled.shape)
    # plotting.plot_2d(all_stacked[:, 0], "training and test data", save=False)
    # plotting.plot_2d(all_stacked_scaled[:, 0], "training and test data scaled", save=False)

    ae_params = {
        'input_dim': sets_training_scaled[0].shape[1],  # 56
        'latent_dim': 2,
        'hidden_layers': (56, 40, 28, 12, 4, 2),
        'leaky_relu': 0.1,
        'loss': 'mse',
        'last_activation': 'linear',
        'batch_size': 20,
        'epochs': 100,
        'steps_per_epoch': 500
    }
    ae_params_hash = hashlib.md5(
        json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

    autoencoder = Autoencoder(ae_params)
    # autoencoder.train(all_stacked_scaled, sets_test_scaled)
    # autoencoder.train(sets_test_scaled[0], sets_test_scaled)
    # autoencoder.train(all_training_scaled, sets_test_scaled)
    # autoencoder.save_model("ae_" + ae_params_hash)
    autoencoder.load_model("ae_" + ae_params_hash)

    # 2: encode data using autoencoder
    sets_encoded_training = []
    for set_training_scaled in sets_training_scaled:
        sets_encoded_training.append(autoencoder.encode(set_training_scaled))

    sets_encoded_test = []
    for set_test_scaled in sets_test_scaled:
        sets_encoded_test.append(autoencoder.encode(set_test_scaled))

    plotting.plot_2d(sets_encoded_test[0],
                     "test_feature_normalised_encoded_autoencoder_on_",
                     save=True)

    # 6: decode using autoencoder
    decoded_test = autoencoder.decode(sets_encoded_test[0])

    # 7: undo minimax, for now only the first simulation
    simulated = preprocess_normalisation.rescale_data(
        decoded_test, dataset_name=test_dataset_names[0])

    plotting.plot_some_curves(
        "test_feature_normalised_compare_autoencoder_before_rescale",
        sets_test_scaled[0], decoded_test, [25, 50, 75, 815],
        maturities)  # old: [25, 50, 75, 100, 600, 720, 740, 815]

    plotting.plot_some_curves(
        "test_feature_normalised_compare_autoencoder", sets_test[0], simulated,
        [25, 50, 75, 815],
        maturities)  # old: [25, 50, 75, 100, 600, 720, 740, 815]

    # curve_smooth = []
    # for curve in simulated:
    #     print("curve.shape", curve.shape)
    #     curve_smooth.append(savgol_filter(curve, 23, 5))  # window size 51, polynomial order 3
    # curve_smooth = np.array(curve_smooth)

    print("reconstruction error BEFORE smoothing:")
    reconstruction_error(np.array(sets_test[0]), simulated)

    preprocess_normalisation.enable_curve_smoothing = True
    simulated = preprocess_normalisation.rescale_data(
        decoded_test, dataset_name=test_dataset_names[0])

    plotting.plot_some_curves(
        "test_feature_normalised_compare_autoencoder", sets_test[0], simulated,
        [25, 50, 75, 815],
        maturities)  # old: [25, 50, 75, 100, 600, 720, 740, 815]

    # plotting.plot_some_curves("test_feature_normalised_compare_normalisation", sets_test[0], sets_test_scaled[0],
    #                           [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True)

    # reconstruction error
    # reconstruction_error(sets_test_scaled[0], decoded_test)
    print("reconstruction error AFTER smoothing:")
    reconstruction_error(np.array(sets_test[0]), simulated)
예제 #24
0
    def __init__(self, params):
        self.config = Config()
        self.plotting = Plotting()

        self.params = params
class AdversarialAutoencoder():
    def __init__(self, params, plot=True):

        self.config = Config()
        self.plotting = Plotting()
        self.params = params
        self.plot = plot

        self.input_dim = params['input_dim']
        self.latent_dim = params['latent_dim']

        optimizer = Adam(0.0002, 0.5)  # learning rate, beta_1

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator(params)
        self.discriminator.compile(loss=params['loss_discriminator'],
                                   optimizer=optimizer,
                                   metrics=['accuracy'])

        # Build the encoder / decoder
        self.encoder = self.build_encoder(params)
        self.decoder = self.build_decoder(params)

        img = Input(shape=(self.input_dim, ))
        # The generator takes the image, encodes it and reconstructs it
        # from the encoding
        encoded_repr = self.encoder(img)
        reconstructed_img = self.decoder(encoded_repr)

        # For the adversarial_autoencoder model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator determines validity of the encoding
        validity = self.discriminator(encoded_repr)

        # The adversarial_autoencoder model  (stacked generator and discriminator)
        self.adversarial_autoencoder = Model(img,
                                             [reconstructed_img, validity])
        self.adversarial_autoencoder.compile(
            loss=[params['loss_generator'], params['loss_discriminator']],
            loss_weights=[0.999, 0.001],
            optimizer=optimizer)

    def build_encoder(self, params):
        # Encoder

        img = Input(shape=(self.input_dim, ))
        h = img
        for i in np.arange(len(params['hidden_layers'])):
            h = Dense(params['hidden_layers'][i])(h)
            h = LeakyReLU(alpha=params['leaky_relu'])(h)
        mu = Dense(self.latent_dim)(h)
        log_var = Dense(params['latent_dim'])(h)
        latent_repr = Lambda(
            lambda p: p[0] + K.random_normal(K.shape(p[0])) * K.exp(p[1] / 2),
            lambda p: p[0])([mu, log_var])

        model = Model(img, latent_repr)
        print("-" * 100, "\nencoder:")
        model.summary()
        return model

    def build_decoder(self, params):

        # model = Sequential()
        z = Input(shape=(params['latent_dim'], ))
        h = z
        # for i in np.flip(np.arange(1, 2 * (len(params['hidden_layers']) + 1))):
        for i in np.flip(np.arange(len(params['hidden_layers']))):
            h = Dense(params['hidden_layers'][i])(h)
            h = LeakyReLU(alpha=params['leaky_relu'])(h)
        img = Dense(self.input_dim, activation=params['last_activation'])(h)

        model = Model(z, img)
        print("-" * 100, "\ndecoder:")
        model.summary()
        return model

    def build_discriminator(self, params):

        # model = Sequential()
        encoded_repr = Input(shape=(self.latent_dim, ))
        h = encoded_repr
        h = Dense(self.latent_dim)(h)
        for i in np.arange(len(params['hidden_layers_discriminator'])):
            h = Dense(params['hidden_layers_discriminator'][i])(h)
            h = LeakyReLU(alpha=params['leaky_relu'])(h)
        validity = Dense(
            1, activation=self.params['last_activation_discriminator'])(h)

        model = Model(encoded_repr, validity)
        print("-" * 100, "\ndiscriminator:")
        model.summary()
        return model

    def train(self,
              x_train,
              x_val,
              name=None,
              sample_interval=50,
              epochs=None,
              batch_size=None):

        if epochs is None:
            epochs = self.params['epochs']
        if batch_size is None:
            batch_size = self.params['batch_size']

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        discriminator_loss = []
        generator_loss = []
        generator_mse = []

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, x_train.shape[0], batch_size)
            imgs = x_train[idx]

            latent_fake = self.encoder.predict(imgs)
            latent_real = np.random.normal(size=(batch_size, self.latent_dim))

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(latent_real, valid)
            d_loss_fake = self.discriminator.train_on_batch(latent_fake, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            # Train the generator
            g_loss = self.adversarial_autoencoder.train_on_batch(
                imgs, [imgs, valid])

            # Plot the progress
            if sample_interval is not None and sample_interval != -1:
                if epoch % int(sample_interval) == 0:
                    print(
                        "%d [D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f]" %
                        (epoch, d_loss[0], 100 * d_loss[1], g_loss[0],
                         g_loss[1]))

            # if epoch % sample_interval == 0:
            # self.plotting.plot_grid_1dim(self.config.get_filepath_img("/aae_training/" + str(epoch)), maturities, self.decoder)
            discriminator_loss.append(d_loss[0])
            generator_loss.append(g_loss[0])
            generator_mse.append(g_loss[1])

        print("[D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f]" %
              (discriminator_loss[-1], 100 * discriminator_loss[-1],
               generator_loss[-1], generator_mse[-1]))

        if self.plot:
            self.plotting.plot_losses(discriminator_loss, generator_loss,
                                      generator_mse, "adversarial_losses")

        if name is not None:
            self.save_model(name)

    def save_model(self, name):
        self.encoder.save(self.config.get_filepath_ae_model(name + "_encoder"))
        self.decoder.save(self.config.get_filepath_ae_model(name + "_decoder"))
        self.discriminator.save(
            self.config.get_filepath_ae_model(name + "_discriminator"))

    def load_model(self, name):
        encoder_filepath = self.config.get_filepath_ae_model(name + "_encoder")
        decoder_filepath = self.config.get_filepath_ae_model(name + "_decoder")
        discriminator_filepath = self.config.get_filepath_ae_model(
            name + "_discriminator")

        if self.config.file_exists(
                encoder_filepath) and self.config.file_exists(
                    decoder_filepath) and self.config.file_exists(
                        discriminator_filepath):
            self.encoder = load_model(encoder_filepath, compile=False)
            self.decoder = load_model(decoder_filepath, compile=False)
            self.discriminator = load_model(discriminator_filepath,
                                            compile=False)
            return True
        else:
            print("trained model does not exist yet!")
            return False

    def load_else_train(self, x_train, x_val, name):
        did_load = self.load_model(name)
        if not did_load:
            self.train(x_train, x_val)
            self.save_model(name)

    def encode(self, data):
        # if the data is a list then encode each item separately
        if isinstance(data, list):
            temp = []
            for i in np.arange(len(data)):
                temp.append(self.encoder.predict(data[i]))
            return temp
        else:
            return self.encoder.predict(data)

    def decode(self, data):
        # if the data has three dimensions then the first is the number of simulations
        if len(data.shape) is 3:
            temp = []
            for i in np.arange(data.shape[0]):
                temp.append(self.decoder.predict(data[i]))
            return np.array(temp)

        else:
            return self.decoder.predict(data)
예제 #26
0
def simulate():
    plotting = Plotting()
    preprocess_normalisation = PreprocessData()
    preprocess_logreturns = PreprocessData()
    preprocess_normalisation.enable_normalisation_scaler = True
    preprocess_logreturns.enable_log_returns = True

    # 1. get data and apply pre-processing
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data()

    ae_params = { 'preprocess_type': PreprocessType.NORMALISATION_OVER_TENORS.value,
                  'input_dim': (10, sets_training_scaled[0].shape[1],), # 56
                  'latent_dim': 2*56,
                  'hidden_layers': (12*56, 4*56, ),
                  'leaky_relu': 0.1,
                  'loss': 'mse',
                  'last_activation': 'linear',
                  'batch_size': 5,
                  'epochs': 5,
                  'steps_per_epoch': 500}

    ae_params_hash = hashlib.md5(json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest()

    autoencoder = Autoencoder(ae_params)
    # autoencoder.train(np.vstack(sets_training_scaled), sets_test_scaled)
    # autoencoder.save_model("ae_" + ae_params_hash)
    autoencoder.load_else_train(sets_training_scaled, sets_test_scaled, "ae_" + ae_params_hash)

    # 2: encode data using autoencoder
    sets_encoded_training = autoencoder.encode(sets_training_scaled)
    sets_encoded_test = autoencoder.encode(sets_test_scaled)

    print("sets_encoded_test", sets_encoded_test[0].shape)
    plotting.plot_2d(sets_encoded_test[0], "encoded test data with deep autoencoder", save=False)

    # 3: log returns of encoded data
    sets_encoded_log_training = preprocess_logreturns.scale_data(sets_encoded_training)
    sets_encoded_log_test = preprocess_logreturns.scale_data(sets_encoded_test)

    plotting.plot_2d(sets_encoded_log_test[0], "encoded test data with deep autoencoder, then log returns", save=False)

    num_c = 6*7
    num_o = 6*7
    gan_params = {'ae_params_hash': ae_params_hash,
                  'num_tenors': sets_encoded_log_training[0].shape[1],
                  'num_c': num_c,
                  'num_z': 6*7,
                  'num_o': num_o,
                  'gen_model_type': 'standard', # conv
                  'dis_model_type': 'standard', # conv
                  'gen_layers': (4*(6*7*2),), # 4 * num_o * num_tenors
                  'dis_layers': (4*(6*7),), # 4 * num_o
                  'gen_last_activation': 'tanh',
                  'dis_last_activation': 'sigmoid',
                  'loss': 'binary_crossentropy',
                  'batch_size': 128,
                  'epochs': 20000}
    gan_params_hash = hashlib.md5(json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest()

    gan = GAN(gan_params)  # try training on larger input and output
    # gan.train(sets_encoded_log_training, sample_interval=200)
    # gan.save_model("gan_" + gan_params_hash)
    gan.load_model("gan_" + gan_params_hash)

    # COV TEST, TEMPORARY
    # for name, set in zip(training_dataset_names, sets_training):
    #     print("name:", name)
    #     set_cov_log_returns_over_features = cov_log_returns_over_features(set)
    #     plotting.plot_3d_cov("covariance_time_series_" + name, set_cov_log_returns_over_features, show_title=False)
    #     plotting.plot_3d("time_series_" + name, set, maturities)
    # END COV TEST.

    # 4: simulate on encoded log returns, conditioned on test dataset
    num_simulations = 10
    num_repeats = 0
    generated, _ = gan.generate(condition=sets_encoded_log_test[-1], condition_on_end=False, num_simulations=num_simulations, repeat=num_repeats)

    # insert the last real futures curve in order to do rescaling
    print("sets_encoded_log_test[-1][num_c] shape", sets_encoded_log_test[-1].iloc[num_c].shape)
    print("generated_segments.shape", generated.shape)
    generated = np.insert(generated, 0, sets_encoded_log_test[-1].iloc[num_c], axis=0)

    # 5: undo log-returns # todo: this start_value is actually one off! Error still persists... autoencoder causing the difference?
    encoded_generated = preprocess_logreturns.rescale_data(generated, start_value=sets_encoded_test[-1][num_c])
    encoded_generated = encoded_generated[:, 1:] # remove first curve again
    # 6: decode using autoencoder
    decoded_generated_segments = autoencoder.decode(encoded_generated)

    # 7: undo minimax, for now only the first simulation
    simulated = preprocess_normalisation.rescale_data(decoded_generated_segments, dataset_name=test_dataset_names[-1])

    preprocess_normalisation.enable_curve_smoothing = True
    simulated_smooth = preprocess_normalisation.rescale_data(decoded_generated_segments, dataset_name=test_dataset_names[-1])

    real = np.array(sets_test[-1])[num_c:num_c + num_o]

    print("simulated, real", simulated.shape, real.shape)

    smape_result = smape(simulated, real)
    smape_result_smooth = smape(simulated_smooth, real)
    print("smape_result and smooth", smape_result, smape_result_smooth)
    print("smape_resul_smooth", smape_result_smooth)
def simulate(latent_dim=2,
             preprocess_type1=None,
             preprocess_type2=None,
             ae_model=None,
             gan_model=None,
             force_training=True,
             plot=False):
    preprocess1 = PreprocessData(preprocess_type1, short_end=True)
    preprocess2 = PreprocessData(preprocess_type2, short_end=True)

    # 1. get data and apply scaling
    sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess1.get_data(
    )

    print("sets_test_scaled, sets_training_scaled:", sets_test_scaled[0].shape,
          sets_training_scaled[0].shape)

    # 2: log returns of encoded data
    sets_encoded_log_training = preprocess2.scale_data(sets_training_scaled,
                                                       training_dataset_names,
                                                       should_fit=True)
    sets_encoded_log_test = preprocess2.scale_data(sets_test_scaled,
                                                   test_dataset_names,
                                                   should_fit=True)

    num_c = 6 * 7
    num_o = 6 * 7
    if gan_model is GANModel.WGAN:
        gan_params = {
            'short_end_encoding':
            preprocess_type1.name + "_" + preprocess_type2.name,
            'num_tenors': sets_encoded_log_training[0].shape[1],
            'num_c': 6 * 7,
            'num_z': 6 * 7,
            'num_o': 6 * 7,
            'gen_model_type': 'standard',  # conv
            'dis_model_type': 'standard',  # conv
            'gen_layers': (4 * (6 * 7 * 2), ),  # 4 * num_o * num_tenors
            'dis_layers': (4 * (6 * 7), ),  # 4 * num_o
            'gen_last_activation': 'tanh',
            'dis_last_activation': 'sigmoid',
            'loss': 'binary_crossentropy',
            'batch_size': 32,
            'epochs': 10000,
            'sample_interval': 1000
        }
        gan_params_hash = hashlib.md5(
            json.dumps(gan_params,
                       sort_keys=True).encode('utf-8')).hexdigest()
        gan = CWGANGP(gan_params, plot=False)
    else:
        if gan_model is GANModel.GAN_CONV:
            model_type = 'conv'
        else:  # if gan_model is GANModel.GAN:
            model_type = 'standard'

        print("num tenors:", sets_encoded_log_training[0].shape[1])

        gan_params = {
            'short_end_encoding':
            preprocess_type1.name + "_" + preprocess_type2.name,
            'num_tenors': sets_encoded_log_training[0].shape[1],
            'num_c': num_c,
            'num_z': 6 * 7,
            'num_o': num_o,
            'gen_model_type': model_type,  # conv
            'dis_model_type': model_type,  # conv
            'gen_layers': (4 * (6 * 7 * 2), ),  # 4 * num_o * num_tenors
            'dis_layers': (4 * (6 * 7), ),  # 4 * num_o
            'gen_last_activation': 'tanh',
            'dis_last_activation': 'sigmoid',
            'loss': 'binary_crossentropy',
            'batch_size': 128,
            'epochs': 20000
        }
        gan_params_hash = hashlib.md5(
            json.dumps(gan_params,
                       sort_keys=True).encode('utf-8')).hexdigest()
        gan = GAN(gan_params,
                  plot=False)  # try training on larger input and output

    if force_training:
        gan.train(sets_encoded_log_training, "gan_" + gan_params_hash)
    else:
        gan.load_else_train(sets_encoded_log_training,
                            "gan_" + gan_params_hash)

    # 4: simulate on encoded log returns, conditioned on test dataset
    num_simulations = 100
    num_repeats = 0

    print("sets_encoded_log_test[-1]", sets_encoded_log_test[-1].shape)

    generated, _ = gan.generate(condition=sets_encoded_log_test[-1],
                                condition_on_end=False,
                                num_simulations=num_simulations,
                                repeat=num_repeats)

    # insert the last real futures curve in order to do rescaling
    if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS:
        generated = np.insert(generated,
                              0,
                              sets_encoded_log_test[-1].iloc[num_c],
                              axis=1)

    print("sets_test_scaled[-1]", sets_test_scaled[-1].shape)
    print("sets_test_scaled[-1][num_c]", sets_test_scaled[-1].iloc[num_c])

    # 5: undo scaling
    encoded_generated = preprocess2.rescale_data(
        generated,
        start_value=sets_test_scaled[-1].iloc[num_c],
        dataset_name=test_dataset_names[-1])
    if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS:
        encoded_generated = encoded_generated[:,
                                              1:]  # remove first curve again

    # 7: undo scaling, this can be log-returns
    simulated = preprocess1.rescale_data(encoded_generated,
                                         start_value=sets_test[-1].iloc[num_c],
                                         dataset_name=test_dataset_names[-1])

    if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS:
        real = np.array(
            sets_test[-1])[num_c:num_c + num_o +
                           1]  # `+1` because the log-returns also does +1
    else:
        real = np.array(sets_test[-1])[num_c:num_c + num_o + 1]

    sim = simulated.reshape(100, 43)

    print("sets_test[-1].iloc[num_c], sim[0][0]", sets_test[-1].iloc[num_c],
          sim[0][0], sim[1][0], sim[2][0])
    print("real, simulated", real.shape, sim.shape)

    smape_result = smape(sim, real, over_curves=True)

    if plot:
        condition_and_real = sets_test[-1].iloc[0:num_c + num_o + 1]
        plotting = Plotting()
        plotting.plot_training_sample("simulated_simple",
                                      sim,
                                      condition_and_real,
                                      num_c,
                                      after_real_data=True)

        # print("smape test:", smape(simulated[0], real), smape_result)

    return smape_result