def create_sample(self, *, caller_file: str) -> None: """ Create sample from history record. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ if len(self.columns) != 2: raise RuntimeError("Scatter plot must specify two columns.") # Prefix for all data files caller_name = FileUtil.get_caller_name(caller_file=caller_file) x_feature = self.columns[0] y_feature = self.columns[1] df = pd.read_csv(f"{caller_name}.{self.input_file}.csv") if self.countries is not None: df = df.loc[df['LOCATION'].isin(self.countries)] # Regression using buckets tolerance = 1e-10 x_bucket_boundaries = list( np.arange(self.min, self.max + tolerance, self.step, dtype=float)) x_bucket_mean_list = [] y_bucket_mean_list = [] y_bucket_std_list = [] for x_bucket_index in range(len(x_bucket_boundaries) - 1): # Range of values for the bucket x_bucket_min = x_bucket_boundaries[x_bucket_index] x_bucket_max = x_bucket_boundaries[x_bucket_index + 1] # DF filter (lower value inclusive, higher value exclusive) bucket_filter = (df[x_feature] >= x_bucket_min) & (df[x_feature] < x_bucket_max) bucket_df = df[bucket_filter] # Skip if no points if len(bucket_df[x_feature]) == 0: continue # Create (x,y) lists for mean and std line charts x_bucket_mean = bucket_df[x_feature].values.mean() y_bucket_mean = bucket_df[y_feature].values.mean() y_bucket_std = bucket_df[y_feature].values.std() x_bucket_mean_list.append(x_bucket_mean) y_bucket_mean_list.append(y_bucket_mean) y_bucket_std_list.append(y_bucket_std) # Save sample to file sample_df = pd.DataFrame({ x_feature: x_bucket_mean_list, f"mean({y_feature})": y_bucket_mean_list, f"std_dev({y_feature})": y_bucket_std_list, }) sample_df.to_csv(f"{caller_name}.bucket.csv", index=False, float_format="%.6f")
def delete_sample(self, *, caller_file: str) -> None: """ Delete sample file. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ caller_name = FileUtil.get_caller_name(caller_file=caller_file) os.remove(f"{caller_name}.bucket.csv")
def delete_plot(*, caller_file: str) -> None: """ Delete plot file. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ caller_name = FileUtil.get_caller_name(caller_file=caller_file) os.remove(f"{caller_name}.sample.scatter.png")
def cleanup(self, *, caller_file: str) -> None: """ Delete all files generated by this script. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ caller_name = FileUtil.get_caller_name(caller_file=caller_file) os.remove(f"{caller_name}.history.short_rate.csv") os.remove(f"{caller_name}.history.term_rate.csv")
def delete_plot(*, caller_file: str) -> None: """ Delete plot file. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ caller_name = FileUtil.get_caller_name(caller_file=caller_file) file_name = f"{caller_name}.{self.output_file.lower()}.png" os.remove(file_name)
def save_plot(self, *, caller_file: str) -> None: """ Create plot from sample. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ # Prefix for all data files caller_name = FileUtil.get_caller_name(caller_file=caller_file) fig = go.Figure() plot_title = self.title x_axis_label = "Month" y_axis_label = "Value" for input_file in self.input_files: df = pd.read_csv(f'{caller_name}.{input_file}.csv') df = df.loc[df['FREQUENCY'] == 'M'] if self.countries is not None: df = df.loc[df['LOCATION'].isin(self.countries)] countries = df['LOCATION'].unique().tolist() for country in countries: times = [DateUtil.get_sequential_month(year_month=t) for t in df.loc[df['LOCATION'] == country]['TIME']] values = df.loc[df['LOCATION'] == country]['Value'] fig.add_trace( go.Scatter( x=times, y=values, mode='lines', line=dict(width=3.0), name=input_file + "." + country)) fig.update_layout(margin=dict(l=80, r=20, t=80, b=40), title={ 'text': plot_title, 'font': {'family': "Roboto", 'size': 18}, 'x': 0.5 }, xaxis=dict(showgrid=True, tickangle=0, title={'text': x_axis_label, 'font': {'family': "Roboto", 'size': 13}}), yaxis=dict(showgrid=True, tickformat='.2f', nticks=20, title={'text': y_axis_label, 'font': {'family': "Roboto", 'size': 13}}) ) # Save plot file file_name = f"{caller_name}.{self.title.lower()}.png" # fig.update_layout(template=plot_util.get_plot_template()) fig.write_image(file_name)
def save_plot(self, *, caller_file: str) -> None: """ Create plot from country basket sample. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ # Prefix for all data files caller_name = FileUtil.get_caller_name(caller_file=caller_file) # Create plot fig = go.Figure() x_feature = self.x_feature y_feature = self.y_feature mean_feature = f"mean({self.y_feature})" std_dev_feature = f"std_dev({self.y_feature})" df = pd.read_csv(f"{caller_name}.{self.input_file}.csv") if self.countries is not None: df = df.loc[df['LOCATION'].isin(self.countries)] # Iterate over unique list of countries countries = df["LOCATION"].unique() for country in countries: # Create DF filtered by the country country_df = df[df["LOCATION"] == country] # Get values for the current country x_values = country_df[x_feature] y_values = country_df[mean_feature] z_values = country_df[std_dev_feature] # Add scatter plot for each country fig.add_trace(go.Scatter( x=x_values, y=y_values, name=f"mean({country})", mode='lines', marker={'color': 'yellow'})) fig.add_trace(go.Scatter( x=x_values, y=z_values, name=f"std_dev({country})", mode='lines', marker={'color': 'green'})) # Update layout output_file = self.output_file if self.output_file is not None else self.input_file fig.update_layout( title=output_file, xaxis=dict(showgrid=True, title={'text': x_feature}), yaxis=dict(showgrid=True, title={'text': y_feature}) ) # fig.update_xaxes(range=[self.min, self.max]) # fig.update_yaxes(range=[self.min, self.max]) # Save plot file file_name = f"{caller_name}.{output_file}.png" PlotUtil.save_plot(fig, file_name) PlotUtil.show_plot(fig)
def create_sample(self, *, caller_file: str) -> None: """ Create sample from history record. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ # Prefix for all data files caller_name = FileUtil.get_caller_name(caller_file=caller_file) # Create DF where the results will be merged sample_df = None shifted_sample_df = None for feature in self.features: # Read and transform time series for each feature time_series_df = pd.read_csv( f"{caller_name}.history.{feature}.csv") # Filter by monthly frequency time_series_df = time_series_df[time_series_df["FREQUENCY"] == "M"] # Filter by country if country list is specified if self.countries is not None: time_series_df = time_series_df[ time_series_df["LOCATION"].isin(self.countries)] # Create sequential month list unshifted_months = [ DateUtil.get_sequential_month(year_month=ym) for ym in time_series_df["TIME"] ] # Create DF with unshifted data values = time_series_df["Value"] location = time_series_df["LOCATION"] unshifted_df = pd.DataFrame({ "LOCATION": location, "Month": unshifted_months, f"{feature}(t)": values.values }) # Merge unshifted time series for the feature if sample_df is None: sample_df = unshifted_df else: sample_df = sample_df.merge(unshifted_df) # Add features with the specified time shift if not None if self.lag_months is not None: # Create sequential month list shifted backwards(!) by the specified time shift shifted_months = [ m - self.lag_months for m in unshifted_months ] shift_label = DateUtil.get_lag_label( lag_months=self.lag_months) # Merge shifted data shifted_df = pd.DataFrame({ "LOCATION": location, "Month": shifted_months, f"{feature}(t{shift_label})": values.values }) if shifted_sample_df is None: shifted_sample_df = shifted_df else: shifted_sample_df = shifted_sample_df.merge(shifted_df) sample_df = sample_df.merge(shifted_sample_df) # Drop month and location columns sample_df.drop(["Month"], axis=1, inplace=True) # Save sample to file sample_df.to_csv(f"{caller_name}.lag_sample.csv", index=False, float_format="%.6f")
def save_plot(self, *, caller_file: str) -> None: """ Create plot from sample. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ # Prefix for all data files caller_name = FileUtil.get_caller_name(caller_file=caller_file) fig = go.Figure() plot_title = "Sample Migration Plot" df = pd.read_csv(f"{caller_name}.{self.sample_type}sample.csv") if self.countries is not None: df = df.loc[df['LOCATION'].isin(self.countries)] fig.add_trace( go.Scatter(x=df[self.x_feature], y=df[self.y_feature], mode='markers', marker=dict(size=10, color='blue', symbol='circle'), name="initial_points", opacity=0.9)) df_shifted = df if self.sample_type != self.shifted_sample_type: df_shifted = pd.read_csv( f"{caller_name}.{self.shifted_sample_type}sample.csv") if self.countries is not None: df_shifted = df_shifted.loc[df_shifted['LOCATION'].isin( self.countries)] fig.add_trace( go.Scatter(x=df_shifted[self.x_shifted_feature], y=df_shifted[self.y_shifted_feature], mode='markers', marker=dict(size=10, color='green', symbol='circle'), name="final_points", opacity=0.9)) fig.add_trace( go.Scatter(x=[self.x_initial_point], y=[self.y_initial_point], mode='markers', marker=dict(size=20, color='red', symbol='circle'), name='initial_mean')) x_shifted_mean = np.mean(df_shifted[self.x_shifted_feature]) y_shifted_mean = np.mean(df_shifted[self.y_shifted_feature]) fig.add_trace( go.Scatter(x=[x_shifted_mean], y=[y_shifted_mean], mode='markers', marker=dict(size=20, color='black', symbol='circle'), name='final_mean')) fig.add_shape(type="rect", x0=self.x_initial_point - 1, y0=self.y_initial_point - 1, x1=self.x_initial_point + 1, y1=self.x_initial_point + 1, line=dict(color="red")) x_ellipse, y_ellipse = self.ellipse(df_shifted[self.x_shifted_feature], df_shifted[self.y_shifted_feature], 200) fig.add_trace( go.Scatter(x=x_ellipse, y=y_ellipse, name="two_sigma", mode='lines', line=dict(width=1, color='black'))) fig.update_layout(margin=dict(l=80, r=20, t=80, b=40), title={ 'text': plot_title, 'font': { 'family': "Roboto", 'size': 18 }, 'x': 0.5 }, xaxis=dict(showgrid=True, title={ 'text': ''.join([ self.x_feature, " > ", self.x_shifted_feature ]) }), yaxis=dict(showgrid=True, title={ 'text': ''.join([ self.y_feature, " > ", self.y_shifted_feature ]) }), legend=dict(xanchor="left")) # Save plot file file_name = f"{caller_name}.{self.sample_type}{self.shifted_sample_type}sample.migration.png" fig.update_xaxes(range=[-1, 17]) fig.update_yaxes(range=[-1, 17]) fig.write_image(file_name)
def simulate(self, *, caller_file: str) -> None: """ Perform simulation and write synthetic short rate and term rate time series data for multiple currencies. Pass __file__ variable of the caller script as caller_file parameter. It will be used as output file prefix. """ # Initial short rate slice, each element corresponds to one country vol: np.ndarray = np.array(self.vol, dtype=float) rev: np.ndarray = np.array(self.rev, dtype=float) target: np.ndarray = np.array(self.target, dtype=float) short_rate: np.ndarray = np.array(self.short_rate_0, dtype=float) # Create results list and add initial state country_count = len(self.countries) frequency = ["M"] * country_count initial_year_month_list = [DateUtil.get_year_month(sim_month=0) ] * country_count # Convert short and term rates to percent short_rate_pct = (100 * short_rate).tolist() # Add to time series short_rate_time_series = list( zip(self.countries, initial_year_month_list, frequency, short_rate_pct)) # The RandomState provides access to legacy generators. This generator is # considered frozen and will have no further improvements. It is guaranteed # to produce the same values as the final point release of NumPy v1.16. rand = np.random.RandomState(self.seed) # Monthly step month_count = 12 * self.year_count dt = 1.0 / 12.0 sqrt_dt = np.sqrt(dt) frequency = ["M"] * country_count for sim_month in range(1, month_count): # Constant mean reversion speed short_rate_drift = (target - short_rate) * (1 - np.exp(-rev * dt)) # Random shock of the short rate short_rate_rand = rand.normal(0.0, 1.0, country_count) short_rate_shock = (vol * sqrt_dt) * short_rate_rand # Update short and term rate short_rate = short_rate + short_rate_drift + short_rate_shock # Convert to result format where each country observation is on a separate row year_month_list = [DateUtil.get_year_month(sim_month=sim_month) ] * country_count # Convert short and term rates to percent short_rate_pct = (100 * short_rate).tolist() # Add to time series short_rate_time_series_entries = zip(self.countries, year_month_list, frequency, short_rate_pct) short_rate_time_series.extend(short_rate_time_series_entries) # Create DF with results in OECD format so the same processing code can be used # The difference in case (all caps except for Value) is intentional in order to match OECD data columns = ["LOCATION", "TIME", "FREQUENCY", "Value"] short_rate_time_series_df = pd.DataFrame(short_rate_time_series, columns=columns) # Save DF with time series to file caller_name = FileUtil.get_caller_name(caller_file=caller_file) short_rate_time_series_df.to_csv( f"{caller_name}.history.short_rate.csv", index=False, float_format="%.6f")
def simulate(self, *, caller_file: str) -> None: """ Perform simulation and write synthetic short rate and term rate time series data for multiple currencies. Pass __file__ variable of the caller script as caller_file parameter. It will be used as output file prefix. """ # Initial short rate slice, each element corresponds to one country short_vol: np.ndarray = np.array(self.short_vol, dtype=float) term_vol: np.ndarray = np.array(self.term_vol, dtype=float) short_rev: np.ndarray = np.array(self.short_rev, dtype=float) term_rev: np.ndarray = np.array(self.term_rev, dtype=float) cap_rev: np.ndarray = np.array(self.cap_rev, dtype=float) floor_rev: np.ndarray = np.array(self.floor_rev, dtype=float) term_target: np.ndarray = np.array(self.term_target, dtype=float) term_premium: np.ndarray = np.array(self.term_premium, dtype=float) soft_cap: np.ndarray = np.array(self.soft_cap, dtype=float) soft_floor: np.ndarray = np.array(self.soft_floor, dtype=float) short_rate: np.ndarray = np.array(self.short_rate_0, dtype=float) term_rate: np.ndarray = np.array(self.term_rate_0, dtype=float) # Create results list and add initial state country_count = len(self.countries) frequency = ["M"] * country_count initial_year_month_list = [DateUtil.get_year_month(sim_month=0) ] * country_count # Convert short and term rates to percent short_rate_pct = (100 * short_rate).tolist() term_rate_pct = (100 * term_rate).tolist() # Add to time series short_rate_time_series = list( zip(self.countries, initial_year_month_list, frequency, short_rate_pct)) term_rate_time_series = list( zip(self.countries, initial_year_month_list, frequency, term_rate_pct)) # The RandomState provides access to legacy generators. This generator is # considered frozen and will have no further improvements. It is guaranteed # to produce the same values as the final point release of NumPy v1.16. rand = np.random.RandomState(self.seed) # Mean and 2x2 correlation matrix rand_mean = [np.full(2, 0.0)] * country_count rand_cov = \ [ np.array( [ [1.0, self.correlation[c]], [self.correlation[c], 1.0] ], np.float64 ) for c in range(country_count) ] # Monthly step month_count = 12 * self.year_count dt = 1.0 / 12.0 sqrt_dt = np.sqrt(dt) frequency = ["M"] * country_count for sim_month in range(1, month_count): # Increased reversion speed for term rate above cap term_above_cap = np.heaviside(term_rate - soft_cap, 0) term_rate_drift_1 = term_above_cap * (soft_cap - term_rate) * ( 1 - np.exp(-cap_rev * dt)) # Increased reversion speed for term rate below floor term_below_floor = np.heaviside(soft_floor - term_rate, 0) term_rate_drift_2 = term_below_floor * (soft_floor - term_rate) * ( 1 - np.exp(-floor_rev * dt)) # Regular reversion term on top of the faster cap and floor reversion terms term_rate_drift_3 = (term_target - term_rate) * (1 - np.exp(-term_rev * dt)) # Increased reversion speed for short rate above cap short_above_cap = np.heaviside(short_rate - soft_cap, 0) short_rate_drift_1 = short_above_cap * (soft_cap - short_rate) * ( 1 - np.exp(-cap_rev * dt)) # Increased reversion speed for short rate below floor short_below_floor = np.heaviside(soft_floor - short_rate, 0) short_rate_drift_2 = short_below_floor * ( soft_floor - short_rate) * (1 - np.exp(-floor_rev * dt)) # Regular reversion to term rate minus risk premium short_rate_drift_3 = (term_rate - term_premium - short_rate) * (1 - np.exp(-short_rev * dt)) # Total drift is the sum of three terms for each short_rate_drift = short_rate_drift_1 + short_rate_drift_2 + short_rate_drift_3 term_rate_drift = term_rate_drift_1 + term_rate_drift_2 + term_rate_drift_3 # Random shock of the short rate based on multivariate normal distribution # Checks that covariance matrix is positive semidefinite short_rate_rand = np.zeros(country_count) term_rate_rand = np.zeros(country_count) for c in range(country_count): rand_sample = rand.multivariate_normal(mean=rand_mean[c], cov=rand_cov[c], check_valid='raise') (short_rate_rand[c], term_rate_rand[c]) = rand_sample short_rate_shock = (short_vol * sqrt_dt) * short_rate_rand term_rate_shock = (term_vol * sqrt_dt) * term_rate_rand # Update short and term rate short_rate = short_rate + short_rate_drift + short_rate_shock term_rate = term_rate + term_rate_drift + term_rate_shock # Convert to result format where each country observation is on a separate row year_month_list = [DateUtil.get_year_month(sim_month=sim_month) ] * country_count # Convert short and term rates to percent short_rate_pct = (100 * short_rate).tolist() term_rate_pct = (100 * term_rate).tolist() # Add to time series short_rate_time_series_entries = zip(self.countries, year_month_list, frequency, short_rate_pct) term_rate_time_series_entries = zip(self.countries, year_month_list, frequency, term_rate_pct) short_rate_time_series.extend(short_rate_time_series_entries) term_rate_time_series.extend(term_rate_time_series_entries) # Create DF with results in OECD format so the same processing code can be used # The difference in case (all caps except for Value) is intentional in order to match OECD data columns = ["LOCATION", "TIME", "FREQUENCY", "Value"] short_rate_time_series_df = pd.DataFrame(short_rate_time_series, columns=columns) term_rate_time_series_df = pd.DataFrame(term_rate_time_series, columns=columns) # Save DF with time series to file caller_name = FileUtil.get_caller_name(caller_file=caller_file) short_rate_time_series_df.to_csv( f"{caller_name}.history.short_rate.csv", index=False, float_format="%.6f") term_rate_time_series_df.to_csv(f"{caller_name}.history.term_rate.csv", index=False, float_format="%.6f")
def get_caller_name_test(self): """Test for get_caller_name method.""" assert FileUtil.get_caller_name( caller_file=__file__) == "file_util_test"
def save_plot(self, *, caller_file: str) -> None: """ Create plot from sample. Pass __file__ variable of the caller script as caller_file parameter. It will be used as both input and output file prefix. """ if len(self.columns) != 2: raise RuntimeError("Scatter plot must specify two columns.") # Prefix for all data files caller_name = FileUtil.get_caller_name(caller_file=caller_file) fig = go.Figure() x_feature = self.columns[0] y_feature = self.columns[1] df = pd.read_csv(f"{caller_name}.{self.input_file}.csv") if self.countries is not None: df = df.loc[df['LOCATION'].isin(self.countries)] x_values = df[x_feature] y_values = df[y_feature] # Regression using buckets tolerance = 1e-10 x_bucket_boundaries = list( np.arange(self.min, self.max + tolerance, self.step, dtype=float)) x_bucket_mean_list = [] y_bucket_mean_list = [] y_bucket_std_list = [] for x_bucket_index in range(len(x_bucket_boundaries) - 1): # Range of values for the bucket x_bucket_min = x_bucket_boundaries[x_bucket_index] x_bucket_max = x_bucket_boundaries[x_bucket_index + 1] # DF filter (lower value inclusive, higher value exclusive) bucket_filter = (df[x_feature] >= x_bucket_min) & (df[x_feature] < x_bucket_max) bucket_df = df[bucket_filter] # Skip if no points if len(bucket_df[x_feature]) == 0: continue # Create (x,y) lists for mean and std line charts x_bucket_mean = bucket_df[x_feature].values.mean() y_bucket_mean = bucket_df[y_feature].values.mean() y_bucket_std = bucket_df[y_feature].values.std() x_bucket_mean_list.append(x_bucket_mean) y_bucket_mean_list.append(y_bucket_mean) y_bucket_std_list.append(y_bucket_std) # Create plot with regression lines fig = go.Figure() fig.add_trace( go.Scatter(x=x_values, y=y_values, name="value", mode='markers', marker={ 'color': 'blue', 'size': 3 })) # Change marker size here fig.add_trace( go.Scatter(x=x_bucket_mean_list, y=y_bucket_mean_list, name="mean", mode='lines', marker={'color': 'yellow'})) fig.add_trace( go.Scatter(x=x_bucket_mean_list, y=y_bucket_std_list, name="std_dev", mode='lines', marker={'color': 'green'})) fig.update_layout(title=self.title, xaxis=dict(showgrid=True, title={'text': x_feature}), yaxis=dict(showgrid=True, title={'text': y_feature})) # fig.update_xaxes(range=[self.min, self.max]) # fig.update_yaxes(range=[self.min, self.max]) # Save plot file file_name = f"{caller_name}.{self.title.lower()}.png" PlotUtil.save_plot(fig, file_name) PlotUtil.show_plot(fig)