Beispiel #1
0
def plot_history(file_name):
    file_name = "../data_corr_mid_2014/" + file_name
    model = nn.read_model(file_name)
    data = np.array(model.history['history']['val_loss'])
    data.shape = (data.shape[0], 1)
    du.plot_data(None, data)
    return data
Beispiel #2
0
    def training_data(self, nb_samples, with_error=True, **kwargs):
        #Prepares nb_samples by sampling from the model parameters and generating
        #a term structure with the use of PCA, and then evaluates the set of
        #swaptions to produce volatilities
        #The sample is of the form (x_swo, x_ir,y), where x_swo and x_ir are the 
        #future input for the supervised machine learning algorithm, and y is 
        #the desired output
        #Draw random model parameters and IR curves
        if 'seed' in kwargs:
            np.random.seed(kwargs['seed'])
        else:
            np.random.seed(0)

        if 'history_start' in kwargs:
            history_start = kwargs['history_start']
            history_end = kwargs['history_end']
            history_part = None
        else:
            history_start = None
            history_end = None
            if 'history_part' in kwargs:
                history_part = kwargs['history_part']
            else:
                history_part = 0.4
        
        if 'save' in kwargs and kwargs['save']:
            if 'file_name' in kwargs:
                file_name = kwargs['file_name']
            else:
                file_name = sample_file_name(self, nb_samples, with_error, 
                                             history_start, history_end, 
                                             history_part)
            print('Saving to file %s' % file_name)
            
        (y, ir_draw, error_draw, dates) = self.__random_draw(nb_samples, 
                                                        with_error=with_error,
                                                        history_start=history_start,
                                                        history_end=history_end,
                                                        history_part=history_part)
        
        #Draw random dates
        date_index = np.random.randint(0, len(dates), nb_samples)
        dates = dates[date_index]
                                
        #Calculate volatilities according to different conditions
        nb_instruments = len(self.helpers)
        x_swo = np.zeros((nb_samples, nb_instruments), float_type)
        x_ir  = np.zeros((nb_samples, len(self._ircurve.axis(0))), float_type)
        if 'plot' in kwargs and kwargs['plot']:
            plot_ir = True
        else:
            plot_ir = False
            
        if 'threshold' in kwargs:
            threshold = kwargs['threshold']
        else:
            threshold = nb_instruments + 1
        indices = np.ones((nb_samples, ), dtype=bool)
        for row in range(nb_samples):
            if row % 1000 == 0:
                print('Processing sample %s' % row)
            #Set term structure
            try:
                (x_ir[row, :], curve) = self._ircurve.rebuild(dates[row], ir_draw[row, :])
                if plot_ir:
                    du.plot_data(self._ircurve.axis(0).values, x_ir[row, :])
                self._term_structure.linkTo(curve)
                self.model.setParams(ql.Array(y[row, :].tolist()))
                nb_nan_swo = 0
                for swaption in range(nb_instruments):
                    try:
                        NPV = self.helpers[swaption].modelValue()
                        vola = self.helpers[swaption].impliedVolatility(NPV, 1.0e-6, 1000, 0.0001, 2.50)
                        x_swo[row, swaption] = np.clip(vola - error_draw[row, swaption], 0., np.inf)
                    except RuntimeError as e:
                        print('Exception (%s) for (sample, maturity, length): (%s, %s, %s)' % (e, row, self._maturities[swaption], self._lengths[swaption]))
                        nb_nan_swo = nb_nan_swo + 1
                        if nb_nan_swo > threshold:
                            print('Throwing out sample %s' % row)
                            indices[row] = False
                            break;
            except RuntimeError as e:
                print('Throwing out sample %s. Exception: %s' % (row, e))

        if ~np.any(indices):
            raise RuntimeError('All samples were thrown out')
        
        if np.any(~indices):
            #Remove rows with too many nans
            x_swo = x_swo[indices, :]
            x_ir = x_ir[indices, :]
            y = y[indices, :]
            print('%s samples had too many nans' % np.sum(~indices))
        
        if 'save' in kwargs and kwargs['save']:
            if 'append' in kwargs and kwargs['append']:
                try:
                    x_swo_l = np.load(file_name + '_x_swo.npy')
                    x_ir_l = np.load(file_name + '_x_ir.npy')
                    y_l = np.load(file_name + '_y.npy')
                    x_swo = np.concatenate((x_swo_l, x_swo), axis=0)
                    x_ir = np.concatenate((x_ir_l, x_ir), axis=0)                
                    y = np.concatenate((y_l, y), axis=0)
                except Exception as e:
                    print(e)
            
            np.save(file_name + '_x_swo', x_swo)
            np.save(file_name + '_x_ir', x_ir)
            np.save(file_name + '_y', y)
        return (x_swo, x_ir, y)
Beispiel #3
0
def plot():
    #du.data_dir = "../data/"
    mark_read = 'adj_error_s'
    mark_write = 'adj_error_insample40_s'
    data_labels = ('Default Starting Point', 'Historical Starting Point',
                   'Feed-forward Neural Net')
    labels = ('225k_lr5em5_ex6_lay5_d20', )
    av_hist = np.empty((500, len(labels)))
    av_hist.fill(np.nan)
    max_len = 0
    for rank, label in enumerate(labels):
        dates, values, _, _ = get_fnn(mark_read + label)
        size = len(dates)
        origMeanError = values[:, 0].reshape((size, 1)) * 100
        histMeanError = values[:, 1].reshape((size, 1)) * 100
        origObjective = values[:, 3].reshape((size, 1))
        histObjective = values[:, 4].reshape((size, 1))
        meanErrorPrior = values[:, 2].reshape((size, 1)) * 100
        objective = values[:, 5].reshape((size, 1))
        mean_data = np.concatenate(
            (origMeanError, histMeanError, meanErrorPrior), axis=1)
        obje_data = np.concatenate((origObjective, histObjective, objective),
                                   axis=1)

        colors = ('#66c2a5', '#fc8d62', '#8da0cb')
        du.plot_data(dates,
                     mean_data,
                     figsize=(21, 12),
                     labels=data_labels,
                     save=du.data_dir + mark_write + label +
                     '_vola_error_fnn.eps',
                     colors=colors,
                     legend_fontsize=22,
                     legend_color='black',
                     xlabel_fontsize=22,
                     xlabel_color='black',
                     ylabel_fontsize=22,
                     ylabel_color='black',
                     xtick_fontsize=18,
                     xtick_color='black',
                     yticks_format='{:.2f} %',
                     ytick_fontsize=18,
                     ytick_color='black',
                     title='Average Volatility Error',
                     title_fontsize=26)
        du.plot_data(dates,
                     obje_data,
                     figsize=(21, 12),
                     labels=data_labels,
                     save=du.data_dir + mark_write + label +
                     '_npv_error_fnn.eps',
                     colors=colors,
                     legend_fontsize=22,
                     legend_color='black',
                     xlabel_fontsize=22,
                     xlabel_color='black',
                     ylabel_fontsize=22,
                     ylabel_color='black',
                     xtick_fontsize=18,
                     xtick_color='black',
                     yticks_format='{:.2f}',
                     ytick_fontsize=18,
                     ytick_color='black',
                     title='NPV Mean Square Error',
                     title_fontsize=26)

        _, _, val_hist, train_hist = get_fnn(mark_read + label)
        av_val = running_mean(val_hist, 30)
        av_hist[:av_val.shape[0], rank] = av_val
        if av_val.shape[0] > max_len:
            max_len = av_val.shape[0]

    av_hist = av_hist[:max_len, :]
    du.plot_data(None,
                 av_hist,
                 figsize=(22, 11),
                 labels=labels,
                 save=du.data_dir + mark_write + '_cross_validation_fnn.eps',
                 xlabel='Epoch',
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.3f}',
                 ytick_fontsize=18,
                 ytick_color='black')
Beispiel #4
0
def g2_objective_graph():
    mark_read_1 = 'adj_err_s'
    mark_read_2 = '_mse_lr_1.0e-04_ex6_lay9_d20_bn_res_1_rlr_5.0e-01_rlrmin_5.0e-06_rlrpat_10_estop_41'
    mark_write = 'history_adj_err_4m'
    data_labels = ('Simulated Annealing', 'Neural Network')
    labels = ('0.5_0-264', '0.5_44-308', '0.5_88-352', '0.5_132-396',
              '0.5_176-440', '0.5_220-484', '0.5_264-528', '0.5_308-572',
              '0.5_352-616', '0.99_396-660', '0.99_440-704', '0.99_484-748',
              '0.99_528-792', '0.99_572-836', '0.99_616-880')

    labels = ('0.5_0-264', '0.5_88-352', '0.5_176-440', '0.5_264-528',
              '0.5_352-616', '0.99_440-704', '0.99_528-792', '0.99_616-880')

    model_dict = inst.g2
    swo = inst.get_swaptiongen(model_dict)
    max_rank = len(labels) - 1
    prev = 0

    npv = None
    vola = None
    for rank, label in enumerate(labels):
        dates, values, _, _ = get_fnn(mark_read_1 + label + mark_read_2)
        if npv is None:
            npv = np.empty((dates.shape[0], len(data_labels)))
            npv.fill(np.nan)
            vola = np.empty((dates.shape[0], len(data_labels)))
            vola.fill(np.nan)
            out_of_sample = int(label.split('_')[1].split('-')[1])

        file_name = du.data_dir + 'swo_gbp_g2pp_nn_' + mark_read_1 + label + mark_read_2 + '.p'
        model = nn.read_model(file_name)
        if rank < max_rank:
            max_date = int(labels[rank + 1].split('_')[1].split('-')[1])
        else:
            max_date = -1

        #Objective prior
        npv[prev:max_date,
            1], vola[prev:max_date,
                     1] = swo.objective_values(model, prev, max_date)

        temp = values[prev:max_date, 4]  #History
        temp3 = values[prev:max_date, 3]  #Default starting point
        filt = temp3 < temp
        temp[filt] = temp3[filt]
        npv[prev:max_date, 0] = temp

        temp_v = values[prev:max_date, 1]  #History
        temp3_v = values[prev:max_date, 0]  #Default starting point
        filt = temp3_v < temp_v
        temp_v[filt] = temp3_v[filt]
        vola[prev:max_date, 0] = temp_v

        prev = max_date

    vola *= 100
    #colors = ('#66c2a5', '#fc8d62', '#8da0cb')
    colors = ('#fc8d62', '#8da0cb')
    du.plot_data(dates,
                 npv,
                 figsize=(21, 12),
                 labels=data_labels,
                 save=du.data_dir + mark_write + '_npv_error_fnn.eps',
                 colors=colors,
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f}',
                 ytick_fontsize=18,
                 ytick_color='black',
                 title='NPV Mean Square Error',
                 title_fontsize=26,
                 out_of_sample=out_of_sample)
    du.plot_data(dates,
                 vola,
                 figsize=(21, 12),
                 labels=data_labels,
                 save=du.data_dir + mark_write + '_vola_error_fnn.eps',
                 colors=colors,
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f} %',
                 ytick_fontsize=18,
                 ytick_color='black',
                 title='Average Volatility Error',
                 title_fontsize=26,
                 out_of_sample=out_of_sample)
    temp = vola[:, 1] - vola[:, 0]
    temp = temp.reshape((temp.shape[0], 1))
    du.plot_data(dates,
                 temp,
                 figsize=(21, 12),
                 labels=None,
                 save=du.data_dir + mark_write + '_vola_diff_error_fnn.eps',
                 colors=colors,
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f} %',
                 ytick_fontsize=18,
                 ytick_color='black',
                 title='Difference in Average Volatility Error',
                 title_fontsize=26,
                 out_of_sample=out_of_sample)

    return (npv, vola)
Beispiel #5
0
def g2_plot_all():
    #du.data_dir = "../data/"
    mark_read_1 = 'adj_err_s'
    mark_read_2 = '_mse_lr_1.0e-04_ex6_lay9_d20_bn_res_1_rlr_5.0e-01_rlrmin_5.0e-06_rlrpat_10_estop_41'
    mark_write = 'history_adj_err'
    data_labels = ('Simulated Annealing', 'Neural Network')
    labels = ('0.5_0-264', '0.5_44-308', '0.5_88-352', '0.5_132-396',
              '0.5_176-440', '0.5_220-484', '0.5_264-528', '0.5_308-572',
              '0.5_352-616', '0.99_396-660', '0.99_440-704', '0.99_484-748',
              '0.99_528-792', '0.99_572-836', '0.99_616-880')
    #labels = ('0.5_0-264',
    #          '0.5_132-396',
    #          '0.5_308-572',
    #          '0.99_440-704',
    #          '0.99_572-836')
    #labels = ('0.5_0-264',)

    #              '0.5_264-528',
    npv = None
    vola = None
    out_of_sample = 264
    for rank, label in enumerate(labels):
        dates, values, _, _ = get_fnn(mark_read_1 + label + mark_read_2)
        if npv is None:
            npv = np.empty((dates.shape[0], len(data_labels)))
            npv.fill(np.nan)
            vola = np.empty((dates.shape[0], len(data_labels)))
            vola.fill(np.nan)

        lims = [int(x) for x in label.split('_')[1].split('-')]
        npv[lims[0]:, 1] = values[lims[0]:, 5]  #Objective prior
        temp = values[lims[0]:, 4]  #History
        temp3 = values[lims[0]:, 3]  #Default starting point
        filt = temp3 < temp
        temp[filt] = temp3[filt]
        npv[lims[0]:, 0] = temp

        vola[lims[0]:, 1] = values[lims[0]:, 2]
        temp_v = values[lims[0]:, 1]  #History
        temp3_v = values[lims[0]:, 0]  #Default starting point
        filt = temp3_v < temp_v
        temp_v[filt] = temp3_v[filt]
        vola[lims[0]:, 0] = temp_v

    vola *= 100
    #colors = ('#66c2a5', '#fc8d62', '#8da0cb')
    colors = ('#fc8d62', '#8da0cb')
    du.plot_data(dates,
                 npv,
                 figsize=(21, 12),
                 labels=data_labels,
                 save=du.data_dir + mark_write + '_npv_error_fnn.eps',
                 colors=colors,
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f}',
                 ytick_fontsize=18,
                 ytick_color='black',
                 title='NPV Mean Square Error',
                 title_fontsize=26,
                 out_of_sample=out_of_sample)
    du.plot_data(dates,
                 vola,
                 figsize=(21, 12),
                 labels=data_labels,
                 save=du.data_dir + mark_write + '_vola_error_fnn.eps',
                 colors=colors,
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f} %',
                 ytick_fontsize=18,
                 ytick_color='black',
                 title='Average Volatility Error',
                 title_fontsize=26,
                 out_of_sample=out_of_sample)
    temp = vola[:, 1] - vola[:, 0]
    temp = temp.reshape((temp.shape[0], 1))
    du.plot_data(dates,
                 temp,
                 figsize=(21, 12),
                 labels=None,
                 save=du.data_dir + mark_write + '_vola_diff_error_fnn.eps',
                 colors=colors,
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f} %',
                 ytick_fontsize=18,
                 ytick_color='black',
                 title='Difference in Average Volatility Error',
                 title_fontsize=26,
                 out_of_sample=out_of_sample)
Beispiel #6
0
def plot2():
    data_labels = ('Default Starting Point', 'FNN With Error Adjustment .15',
                   'FNN With Error Adjustement .2')
    dates, ad_values, ad_val, _ = get_fnn(middle='adj_error_s150k_d15')
    _, un_values, un_val, _ = get_fnn(middle='adj_error_s150k_d20')
    size = len(dates)
    origMeanError = ad_values[:, 0].reshape((size, 1)) * 100
    origObjective = ad_values[:, 3].reshape((size, 1))
    ad_mean_prior = ad_values[:, 2].reshape((size, 1)) * 100
    un_mean_prior = un_values[:, 2].reshape((size, 1)) * 100
    ad_obje_prior = ad_values[:, 5].reshape((size, 1))
    un_obje_prior = un_values[:, 5].reshape((size, 1))
    mean_data = np.concatenate((origMeanError, un_mean_prior, ad_mean_prior),
                               axis=1)
    obje_data = np.concatenate((origObjective, un_obje_prior, ad_obje_prior),
                               axis=1)

    colors = ('#66c2a5', '#fc8d62', '#8da0cb')
    du.plot_data(dates,
                 mean_data,
                 figsize=(22, 12),
                 labels=data_labels,
                 save=du.data_dir + 'vola_error_fnn_unadj_vs_adj_error.eps',
                 legend_fontsize=22,
                 legend_color='black',
                 colors=colors,
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f} %',
                 ytick_fontsize=18,
                 ytick_color='black')
    du.plot_data(dates,
                 obje_data,
                 figsize=(22, 12),
                 labels=data_labels,
                 save=du.data_dir + 'npv_error_fnn_unadj_vs_adj_error.eps',
                 legend_fontsize=22,
                 legend_color='black',
                 colors=colors,
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f}',
                 ytick_fontsize=18,
                 ytick_color='black')

    if ad_val.shape[0] > un_val.shape[0]:
        max_len = ad_val.shape[0]
    else:
        max_len = un_val.shape[0]
    av_hist = np.empty((max_len, 2))
    av_hist.fill(np.nan)
    av_val = running_mean(ad_val, 10)
    av_hist[:av_val.shape[0], 0] = av_val
    av_val = running_mean(un_val, 10)
    av_hist[:av_val.shape[0], 1] = av_val
    data_labels = ('With Error Adjustement', 'Without Error Adjustment')
    du.plot_data(None,
                 av_hist,
                 figsize=(22, 11),
                 labels=data_labels,
                 save=du.data_dir +
                 'cross_validation_fnn_unadj_vs_adj_error.eps',
                 xlabel='Epoch',
                 legend_fontsize=22,
                 legend_color='black',
                 xlabel_fontsize=22,
                 xlabel_color='black',
                 ylabel_fontsize=22,
                 ylabel_color='black',
                 xtick_fontsize=18,
                 xtick_color='black',
                 yticks_format='{:.2f}',
                 ytick_fontsize=18,
                 ytick_color='black')
Beispiel #7
0
import numpy as np
import pymc3 as pm

import data_utils

data = data_utils.load_data('data/chat_counts_per_day.csv')
n = data.shape[0]

model = pm.Model()

with model:
    alpha = 1.0 / n
    lambda_1 = pm.Exponential('lambda_1', alpha)
    lambda_2 = pm.Exponential('lambda_2', alpha)
    tau = pm.DiscreteUniform('tau', lower=0, upper=n - 1)
    lambda_ = pm.math.switch(tau < np.arange(n), lambda_1, lambda_2)
    observation = pm.Poisson('obs', lambda_, observed=data['count'].values)
    trace = pm.sample(1000, tuning=20000)

texts_per_day = np.zeros(n)
for t in range(n):
    ix = t < trace['tau']
    texts_per_day[t] = 1.0 * (
        trace['lambda_1'][ix].sum() +
        trace['lambda_2'][~ix].sum()) / trace['tau'].shape

fig = data_utils.plot_data(data)
ax = fig.get_axes()[0]
ax.plot(data['date'], texts_per_day, c='red')