def plot_history(file_name): file_name = "../data_corr_mid_2014/" + file_name model = nn.read_model(file_name) data = np.array(model.history['history']['val_loss']) data.shape = (data.shape[0], 1) du.plot_data(None, data) return data
def training_data(self, nb_samples, with_error=True, **kwargs): #Prepares nb_samples by sampling from the model parameters and generating #a term structure with the use of PCA, and then evaluates the set of #swaptions to produce volatilities #The sample is of the form (x_swo, x_ir,y), where x_swo and x_ir are the #future input for the supervised machine learning algorithm, and y is #the desired output #Draw random model parameters and IR curves if 'seed' in kwargs: np.random.seed(kwargs['seed']) else: np.random.seed(0) if 'history_start' in kwargs: history_start = kwargs['history_start'] history_end = kwargs['history_end'] history_part = None else: history_start = None history_end = None if 'history_part' in kwargs: history_part = kwargs['history_part'] else: history_part = 0.4 if 'save' in kwargs and kwargs['save']: if 'file_name' in kwargs: file_name = kwargs['file_name'] else: file_name = sample_file_name(self, nb_samples, with_error, history_start, history_end, history_part) print('Saving to file %s' % file_name) (y, ir_draw, error_draw, dates) = self.__random_draw(nb_samples, with_error=with_error, history_start=history_start, history_end=history_end, history_part=history_part) #Draw random dates date_index = np.random.randint(0, len(dates), nb_samples) dates = dates[date_index] #Calculate volatilities according to different conditions nb_instruments = len(self.helpers) x_swo = np.zeros((nb_samples, nb_instruments), float_type) x_ir = np.zeros((nb_samples, len(self._ircurve.axis(0))), float_type) if 'plot' in kwargs and kwargs['plot']: plot_ir = True else: plot_ir = False if 'threshold' in kwargs: threshold = kwargs['threshold'] else: threshold = nb_instruments + 1 indices = np.ones((nb_samples, ), dtype=bool) for row in range(nb_samples): if row % 1000 == 0: print('Processing sample %s' % row) #Set term structure try: (x_ir[row, :], curve) = self._ircurve.rebuild(dates[row], ir_draw[row, :]) if plot_ir: du.plot_data(self._ircurve.axis(0).values, x_ir[row, :]) self._term_structure.linkTo(curve) self.model.setParams(ql.Array(y[row, :].tolist())) nb_nan_swo = 0 for swaption in range(nb_instruments): try: NPV = self.helpers[swaption].modelValue() vola = self.helpers[swaption].impliedVolatility(NPV, 1.0e-6, 1000, 0.0001, 2.50) x_swo[row, swaption] = np.clip(vola - error_draw[row, swaption], 0., np.inf) except RuntimeError as e: print('Exception (%s) for (sample, maturity, length): (%s, %s, %s)' % (e, row, self._maturities[swaption], self._lengths[swaption])) nb_nan_swo = nb_nan_swo + 1 if nb_nan_swo > threshold: print('Throwing out sample %s' % row) indices[row] = False break; except RuntimeError as e: print('Throwing out sample %s. Exception: %s' % (row, e)) if ~np.any(indices): raise RuntimeError('All samples were thrown out') if np.any(~indices): #Remove rows with too many nans x_swo = x_swo[indices, :] x_ir = x_ir[indices, :] y = y[indices, :] print('%s samples had too many nans' % np.sum(~indices)) if 'save' in kwargs and kwargs['save']: if 'append' in kwargs and kwargs['append']: try: x_swo_l = np.load(file_name + '_x_swo.npy') x_ir_l = np.load(file_name + '_x_ir.npy') y_l = np.load(file_name + '_y.npy') x_swo = np.concatenate((x_swo_l, x_swo), axis=0) x_ir = np.concatenate((x_ir_l, x_ir), axis=0) y = np.concatenate((y_l, y), axis=0) except Exception as e: print(e) np.save(file_name + '_x_swo', x_swo) np.save(file_name + '_x_ir', x_ir) np.save(file_name + '_y', y) return (x_swo, x_ir, y)
def plot(): #du.data_dir = "../data/" mark_read = 'adj_error_s' mark_write = 'adj_error_insample40_s' data_labels = ('Default Starting Point', 'Historical Starting Point', 'Feed-forward Neural Net') labels = ('225k_lr5em5_ex6_lay5_d20', ) av_hist = np.empty((500, len(labels))) av_hist.fill(np.nan) max_len = 0 for rank, label in enumerate(labels): dates, values, _, _ = get_fnn(mark_read + label) size = len(dates) origMeanError = values[:, 0].reshape((size, 1)) * 100 histMeanError = values[:, 1].reshape((size, 1)) * 100 origObjective = values[:, 3].reshape((size, 1)) histObjective = values[:, 4].reshape((size, 1)) meanErrorPrior = values[:, 2].reshape((size, 1)) * 100 objective = values[:, 5].reshape((size, 1)) mean_data = np.concatenate( (origMeanError, histMeanError, meanErrorPrior), axis=1) obje_data = np.concatenate((origObjective, histObjective, objective), axis=1) colors = ('#66c2a5', '#fc8d62', '#8da0cb') du.plot_data(dates, mean_data, figsize=(21, 12), labels=data_labels, save=du.data_dir + mark_write + label + '_vola_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f} %', ytick_fontsize=18, ytick_color='black', title='Average Volatility Error', title_fontsize=26) du.plot_data(dates, obje_data, figsize=(21, 12), labels=data_labels, save=du.data_dir + mark_write + label + '_npv_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f}', ytick_fontsize=18, ytick_color='black', title='NPV Mean Square Error', title_fontsize=26) _, _, val_hist, train_hist = get_fnn(mark_read + label) av_val = running_mean(val_hist, 30) av_hist[:av_val.shape[0], rank] = av_val if av_val.shape[0] > max_len: max_len = av_val.shape[0] av_hist = av_hist[:max_len, :] du.plot_data(None, av_hist, figsize=(22, 11), labels=labels, save=du.data_dir + mark_write + '_cross_validation_fnn.eps', xlabel='Epoch', legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.3f}', ytick_fontsize=18, ytick_color='black')
def g2_objective_graph(): mark_read_1 = 'adj_err_s' mark_read_2 = '_mse_lr_1.0e-04_ex6_lay9_d20_bn_res_1_rlr_5.0e-01_rlrmin_5.0e-06_rlrpat_10_estop_41' mark_write = 'history_adj_err_4m' data_labels = ('Simulated Annealing', 'Neural Network') labels = ('0.5_0-264', '0.5_44-308', '0.5_88-352', '0.5_132-396', '0.5_176-440', '0.5_220-484', '0.5_264-528', '0.5_308-572', '0.5_352-616', '0.99_396-660', '0.99_440-704', '0.99_484-748', '0.99_528-792', '0.99_572-836', '0.99_616-880') labels = ('0.5_0-264', '0.5_88-352', '0.5_176-440', '0.5_264-528', '0.5_352-616', '0.99_440-704', '0.99_528-792', '0.99_616-880') model_dict = inst.g2 swo = inst.get_swaptiongen(model_dict) max_rank = len(labels) - 1 prev = 0 npv = None vola = None for rank, label in enumerate(labels): dates, values, _, _ = get_fnn(mark_read_1 + label + mark_read_2) if npv is None: npv = np.empty((dates.shape[0], len(data_labels))) npv.fill(np.nan) vola = np.empty((dates.shape[0], len(data_labels))) vola.fill(np.nan) out_of_sample = int(label.split('_')[1].split('-')[1]) file_name = du.data_dir + 'swo_gbp_g2pp_nn_' + mark_read_1 + label + mark_read_2 + '.p' model = nn.read_model(file_name) if rank < max_rank: max_date = int(labels[rank + 1].split('_')[1].split('-')[1]) else: max_date = -1 #Objective prior npv[prev:max_date, 1], vola[prev:max_date, 1] = swo.objective_values(model, prev, max_date) temp = values[prev:max_date, 4] #History temp3 = values[prev:max_date, 3] #Default starting point filt = temp3 < temp temp[filt] = temp3[filt] npv[prev:max_date, 0] = temp temp_v = values[prev:max_date, 1] #History temp3_v = values[prev:max_date, 0] #Default starting point filt = temp3_v < temp_v temp_v[filt] = temp3_v[filt] vola[prev:max_date, 0] = temp_v prev = max_date vola *= 100 #colors = ('#66c2a5', '#fc8d62', '#8da0cb') colors = ('#fc8d62', '#8da0cb') du.plot_data(dates, npv, figsize=(21, 12), labels=data_labels, save=du.data_dir + mark_write + '_npv_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f}', ytick_fontsize=18, ytick_color='black', title='NPV Mean Square Error', title_fontsize=26, out_of_sample=out_of_sample) du.plot_data(dates, vola, figsize=(21, 12), labels=data_labels, save=du.data_dir + mark_write + '_vola_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f} %', ytick_fontsize=18, ytick_color='black', title='Average Volatility Error', title_fontsize=26, out_of_sample=out_of_sample) temp = vola[:, 1] - vola[:, 0] temp = temp.reshape((temp.shape[0], 1)) du.plot_data(dates, temp, figsize=(21, 12), labels=None, save=du.data_dir + mark_write + '_vola_diff_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f} %', ytick_fontsize=18, ytick_color='black', title='Difference in Average Volatility Error', title_fontsize=26, out_of_sample=out_of_sample) return (npv, vola)
def g2_plot_all(): #du.data_dir = "../data/" mark_read_1 = 'adj_err_s' mark_read_2 = '_mse_lr_1.0e-04_ex6_lay9_d20_bn_res_1_rlr_5.0e-01_rlrmin_5.0e-06_rlrpat_10_estop_41' mark_write = 'history_adj_err' data_labels = ('Simulated Annealing', 'Neural Network') labels = ('0.5_0-264', '0.5_44-308', '0.5_88-352', '0.5_132-396', '0.5_176-440', '0.5_220-484', '0.5_264-528', '0.5_308-572', '0.5_352-616', '0.99_396-660', '0.99_440-704', '0.99_484-748', '0.99_528-792', '0.99_572-836', '0.99_616-880') #labels = ('0.5_0-264', # '0.5_132-396', # '0.5_308-572', # '0.99_440-704', # '0.99_572-836') #labels = ('0.5_0-264',) # '0.5_264-528', npv = None vola = None out_of_sample = 264 for rank, label in enumerate(labels): dates, values, _, _ = get_fnn(mark_read_1 + label + mark_read_2) if npv is None: npv = np.empty((dates.shape[0], len(data_labels))) npv.fill(np.nan) vola = np.empty((dates.shape[0], len(data_labels))) vola.fill(np.nan) lims = [int(x) for x in label.split('_')[1].split('-')] npv[lims[0]:, 1] = values[lims[0]:, 5] #Objective prior temp = values[lims[0]:, 4] #History temp3 = values[lims[0]:, 3] #Default starting point filt = temp3 < temp temp[filt] = temp3[filt] npv[lims[0]:, 0] = temp vola[lims[0]:, 1] = values[lims[0]:, 2] temp_v = values[lims[0]:, 1] #History temp3_v = values[lims[0]:, 0] #Default starting point filt = temp3_v < temp_v temp_v[filt] = temp3_v[filt] vola[lims[0]:, 0] = temp_v vola *= 100 #colors = ('#66c2a5', '#fc8d62', '#8da0cb') colors = ('#fc8d62', '#8da0cb') du.plot_data(dates, npv, figsize=(21, 12), labels=data_labels, save=du.data_dir + mark_write + '_npv_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f}', ytick_fontsize=18, ytick_color='black', title='NPV Mean Square Error', title_fontsize=26, out_of_sample=out_of_sample) du.plot_data(dates, vola, figsize=(21, 12), labels=data_labels, save=du.data_dir + mark_write + '_vola_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f} %', ytick_fontsize=18, ytick_color='black', title='Average Volatility Error', title_fontsize=26, out_of_sample=out_of_sample) temp = vola[:, 1] - vola[:, 0] temp = temp.reshape((temp.shape[0], 1)) du.plot_data(dates, temp, figsize=(21, 12), labels=None, save=du.data_dir + mark_write + '_vola_diff_error_fnn.eps', colors=colors, legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f} %', ytick_fontsize=18, ytick_color='black', title='Difference in Average Volatility Error', title_fontsize=26, out_of_sample=out_of_sample)
def plot2(): data_labels = ('Default Starting Point', 'FNN With Error Adjustment .15', 'FNN With Error Adjustement .2') dates, ad_values, ad_val, _ = get_fnn(middle='adj_error_s150k_d15') _, un_values, un_val, _ = get_fnn(middle='adj_error_s150k_d20') size = len(dates) origMeanError = ad_values[:, 0].reshape((size, 1)) * 100 origObjective = ad_values[:, 3].reshape((size, 1)) ad_mean_prior = ad_values[:, 2].reshape((size, 1)) * 100 un_mean_prior = un_values[:, 2].reshape((size, 1)) * 100 ad_obje_prior = ad_values[:, 5].reshape((size, 1)) un_obje_prior = un_values[:, 5].reshape((size, 1)) mean_data = np.concatenate((origMeanError, un_mean_prior, ad_mean_prior), axis=1) obje_data = np.concatenate((origObjective, un_obje_prior, ad_obje_prior), axis=1) colors = ('#66c2a5', '#fc8d62', '#8da0cb') du.plot_data(dates, mean_data, figsize=(22, 12), labels=data_labels, save=du.data_dir + 'vola_error_fnn_unadj_vs_adj_error.eps', legend_fontsize=22, legend_color='black', colors=colors, xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f} %', ytick_fontsize=18, ytick_color='black') du.plot_data(dates, obje_data, figsize=(22, 12), labels=data_labels, save=du.data_dir + 'npv_error_fnn_unadj_vs_adj_error.eps', legend_fontsize=22, legend_color='black', colors=colors, xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f}', ytick_fontsize=18, ytick_color='black') if ad_val.shape[0] > un_val.shape[0]: max_len = ad_val.shape[0] else: max_len = un_val.shape[0] av_hist = np.empty((max_len, 2)) av_hist.fill(np.nan) av_val = running_mean(ad_val, 10) av_hist[:av_val.shape[0], 0] = av_val av_val = running_mean(un_val, 10) av_hist[:av_val.shape[0], 1] = av_val data_labels = ('With Error Adjustement', 'Without Error Adjustment') du.plot_data(None, av_hist, figsize=(22, 11), labels=data_labels, save=du.data_dir + 'cross_validation_fnn_unadj_vs_adj_error.eps', xlabel='Epoch', legend_fontsize=22, legend_color='black', xlabel_fontsize=22, xlabel_color='black', ylabel_fontsize=22, ylabel_color='black', xtick_fontsize=18, xtick_color='black', yticks_format='{:.2f}', ytick_fontsize=18, ytick_color='black')
import numpy as np import pymc3 as pm import data_utils data = data_utils.load_data('data/chat_counts_per_day.csv') n = data.shape[0] model = pm.Model() with model: alpha = 1.0 / n lambda_1 = pm.Exponential('lambda_1', alpha) lambda_2 = pm.Exponential('lambda_2', alpha) tau = pm.DiscreteUniform('tau', lower=0, upper=n - 1) lambda_ = pm.math.switch(tau < np.arange(n), lambda_1, lambda_2) observation = pm.Poisson('obs', lambda_, observed=data['count'].values) trace = pm.sample(1000, tuning=20000) texts_per_day = np.zeros(n) for t in range(n): ix = t < trace['tau'] texts_per_day[t] = 1.0 * ( trace['lambda_1'][ix].sum() + trace['lambda_2'][~ix].sum()) / trace['tau'].shape fig = data_utils.plot_data(data) ax = fig.get_axes()[0] ax.plot(data['date'], texts_per_day, c='red')