예제 #1
0
def cv_param_search(estimator, valid_portion=0.2, n_cv_folds=10, n_jobs=1):
    df_train, df_valid = train_valid_split(valid_portion)
    X_train, Y_train = target_feature_split(df_train,
                                            'log_ret_1',
                                            filter_nan=True)
    selected_params = estimator.fit_by_cv(X_train,
                                          Y_train,
                                          n_folds=n_cv_folds,
                                          n_jobs=n_jobs)
    return selected_params
def main():
    # load data
    df = make_overall_eurostoxx_df()
    X, Y = target_feature_split(df, 'log_ret_1', filter_nan=True)
    X, Y = np.array(X), np.array(Y)

    if PCA_FEATURES:
        X = pca_comp(X, n_components=4)

    ndim_x, ndim_y = X.shape[1], 1

    mdn = MixtureDensityNetwork('mdn_empirical_no_pca',
                                ndim_x,
                                ndim_y,
                                n_centers=20,
                                n_training_epochs=500,
                                random_seed=22,
                                x_noise_std=0.2,
                                y_noise_std=0.1)
    mdn.fit(X, Y)

    mdn.plot2d(x_cond=[np.mean(X, axis=0)], ylim=(-0.02, 0.02))
def main():
    # load data
    df = make_overall_eurostoxx_df()
    X, Y, features = target_feature_split(df,
                                          'log_ret_1',
                                          filter_nan=True,
                                          return_features=True)
    X, Y = np.array(X), np.array(Y)

    ndim_x, ndim_y = X.shape[1], 1

    mdn = MixtureDensityNetwork('mdn_empirical_no_pca',
                                ndim_x,
                                ndim_y,
                                n_centers=20,
                                n_training_epochs=10,
                                random_seed=28,
                                x_noise_std=0.2,
                                y_noise_std=0.1)
    mdn.fit(X, Y)

    X_mean = np.mean(X, axis=0)
    X_std = np.mean(X, axis=0)

    # individual plots
    for i, feature in enumerate(features):
        factor = np.zeros(X_std.shape)
        factor[i] = X_std[i]
        x_cond = [
            X_mean - 2 * factor, X_mean - 1 * factor, X_mean,
            X_mean + 1 * factor, X_mean + 2 * factor
        ]
        mdn.plot2d(x_cond=x_cond, ylim=(-0.04, 0.04), show=False)
        plt.legend(
            ['mean-2*std', 'mean-1*std', 'mean', 'mean+1*std', 'mean+2*std'])
        plt.title(feature)
        fig_path = os.path.join(DATA_DIR,
                                'plots/feature_selection/' + feature + '.png')
        plt.savefig(fig_path)

    # one large plot
    resolution = 100
    ncols = 3
    fig, axes = plt.subplots(nrows=5, ncols=ncols, figsize=(12, 16))
    y = np.linspace(-0.04, 0.04, resolution)

    n = 0
    for i, feature in enumerate(features):
        if n == 2:
            n += 1
        factor = np.zeros(X_std.shape)
        factor[i] = X_std[i]
        x_cond = [
            X_mean + 2 * factor, X_mean + 1 * factor, X_mean,
            X_mean - 1 * factor, X_mean - 2 * factor
        ]

        for j in range(len(x_cond)):
            x = np.array([x_cond[j] for _ in range(resolution)])
            z = mdn.pdf(x, y)
            axes[n // ncols][n % ncols].plot(y, z)

        axes[n // ncols][n % ncols].set_title(feature)
        n += 1

    axes[0][0].set_xlabel('log return')
    axes[0][0].set_ylabel('probability density log-returns')

    # make top right plot disappear
    axes[0, 2].tick_params(colors='white')
    for spine in axes[0, 2].spines.values():
        spine.set_color('white')

    fig.legend(
        ['mean+2*std', 'mean+1*std', 'mean', 'mean-1*std', 'mean-2*std'],
        loc=(.77, 0.88))
    fig.tight_layout()

    fig_path = os.path.join(
        DATA_DIR,
        'plots/feature_selection/feature_selection_all_variables.png')
    fig.savefig(fig_path)

    fig_path = os.path.join(
        DATA_DIR,
        'plots/feature_selection/feature_selection_all_variables.pdf')
    fig.savefig(fig_path)
예제 #4
0
def empirical_evaluation(estimator,
                         valid_portion=0.2,
                         moment_r2=True,
                         eval_by_fc=False,
                         fit_by_cv=False):
    """
    Fits the estimator and, based on a left out validation splot, computes the
    Root Mean Squared Error (RMSE) between realized and estimated mean and std

    Args:
      estimator: estimator object
      valid_portion: portion of dataset to be separated as validation set
      moment_r2: (bool) whether to compute the rmse of mean and variance

    Returns:
      (likelihood, mu_rmse, std_rmse)
    """

    # get data and split into train and valid set
    df_train, df_valid = train_valid_split(valid_portion)

    X_train, Y_train = target_feature_split(df_train,
                                            'log_ret_1',
                                            filter_nan=True)
    X_valid, Y_valid = target_feature_split(df_valid,
                                            'log_ret_1',
                                            filter_nan=True)

    # realized moments
    mu_realized = df_valid['log_ret_last_period'][1:]
    std_realized_intraday = np.sqrt(df_valid['RealizedVariation'][1:])

    # fit density model
    if eval_by_fc and not fit_by_cv:
        raise NotImplementedError
    elif not eval_by_fc and fit_by_cv:
        estimator.fit_by_cv(X_train, Y_train, n_folds=5)
    else:
        estimator.fit(X_train, Y_train)

    # compute avg. log likelihood
    mean_logli = np.mean(estimator.log_pdf(X_valid, Y_valid))

    if moment_r2:
        # predict mean and std
        mu_predicted, std_predicted = estimator.mean_std(X_valid,
                                                         n_samples=N_SAMPLES)
        mu_predicted = mu_predicted.flatten()[:-1]
        std_predicted = std_predicted.flatten()[:-1]

        assert mu_realized.shape == mu_predicted.shape
        assert std_realized_intraday.shape == std_realized_intraday.shape

        # compute realized std
        std_realized = np.abs(mu_predicted - mu_realized)

        # compute RMSE
        mu_rmse = np.sqrt(np.mean((mu_realized - mu_predicted)**2))
        std_rmse = np.sqrt(np.mean((std_realized - std_predicted)**2))
        std_intraday_rmse = np.sqrt(
            np.mean((std_realized_intraday - std_predicted)**2))
    else:
        mu_rmse, std_rmse, std_intraday_rmse = None, None, None

    return mean_logli, mu_rmse, std_rmse, std_intraday_rmse
예제 #5
0
def main():

    if COMPUTE_MOMENTS:
        # 1) load data
        df = make_overall_eurostoxx_df()

        X, Y, features = target_feature_split(df,
                                              'log_ret_1',
                                              filter_nan=True,
                                              return_features=True)
        X, Y = np.array(X), np.array(Y)
        ndim_x, ndim_y = X.shape[1], 1

        # 2) Fite density model
        mdn = MixtureDensityNetwork('mdn_empirical_no_pca',
                                    ndim_x,
                                    ndim_y,
                                    n_centers=20,
                                    n_training_epochs=10,
                                    random_seed=22,
                                    x_noise_std=0.2,
                                    y_noise_std=0.1)
        mdn.fit(X, Y)

        # 3) estimate moments
        n_samples = 10**7
        print('compute mean')
        mean = np.squeeze(mdn.mean_(x_cond=X, n_samples=n_samples))
        print('compute cov')
        cov = np.squeeze(mdn.covariance(x_cond=X, n_samples=n_samples))
        print('compute skewness')
        skew = mdn._skewness_mc(x_cond=X, n_samples=n_samples)
        print('compute kurtosis')
        kurt = mdn._kurtosis_mc(x_cond=X, n_samples=n_samples)

        # 4) save data
        data = np.stack([mean, cov, skew, kurt], axis=-1)
        moments_df = pd.DataFrame(
            data=data,
            index=df.dropna().index,
            columns=['mean', 'variance', 'skewness', 'kurtosis'])
        print(moments_df)

        # dump csv
        if not os.path.exists(dump_dir):
            os.makedirs(dump_dir)
        moments_df.to_csv(dump_file_path)

    else:
        moments_df = pd.read_csv(dump_file_path, index_col=0)

    #5) plot moment timeseries
    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=(15, 20))

    x = moments_df.index
    for i in range(4):
        label = moments_df.columns[i]
        y = moments_df.ix[:, i]
        axes[i].plot(x, y)
        axes[i].set_title(label)
    plt.savefig(os.path.join(dump_dir, 'moments_time_series.png'))
    print("Saved figure")