from assets_data_collection import read_in_assets_data from setup_stock_data import convert_to_log_growth, calc_mean_cov_matrix_and_size # Uses hierarchical clustering to find assets that are similar and different to # each other based on data from the covariance matrix of the assets start_time = time.time() # Read in market data with 7 year lookback from today example_assets = ['AAPL', 'H', 'Y', 'SNE', 'GS', 'K', 'NKE', 'LEVI'] # Stocks from different sectors of the market today = dt.date(2020, 9, 24) # When I last ran the script # today = dt.date.today() # May require changing parameters/tuning seven_years_ago = today - dt.timedelta( days=7 * 365) # An approximation - doesn't account for leap years assets_data = read_in_assets_data( example_assets, seven_years_ago, today, True, directory + '/Data/2013to2019_assets_data_for_clustering.csv') assets_growth_data = convert_to_log_growth(assets_data) mean_cov_matrix_and_size = calc_mean_cov_matrix_and_size(assets_growth_data) stock_growth_means, cov_matrix, num_days = mean_cov_matrix_and_size # Splitting and preprocessing scaler = StandardScaler() scaled_cov_matrix = pd.DataFrame(scaler.fit_transform(cov_matrix), columns=cov_matrix.columns) # Hierarchy Visualization with Dendrogram plt.figure(figsize=(10, 7)) plt.title("Assets Dendrogram") plt.xlabel('Assets') plt.ylabel('Distance')
# Performs Bayesian Inference with Markov-Chain Monte-Carlo using the No U-Turn Sampling # algorithm to estimate the posterior distribution of Goldman Sachs stock data # based on todays data with a lookback of seven years. # See the bayesian_model.png file for the stochastic model layout/assumptions. The stochastic model was # designed to account for some aspects of the returns but a truely accurate model would be far more # complex. The purpose of this is just to demonstrate how to perform bayesian inference with pymc3. start_time = time.time() debug = False # Read in market data with 7 year lookback example_assets = ['GS'] today = dt.date(2020, 9, 24) # When I last ran the script # today = dt.date.today() # May require changing parameters/tuning seven_years_ago = today - dt.timedelta(days=7*365) # An approximation - doesn't account for leap years asset_data = read_in_assets_data(example_assets, seven_years_ago, today, True, directory + '/Data/gs_time_series_7_years.csv') asset_growth_data = convert_to_log_growth(asset_data)[example_assets[0]] # Visualize returns asset_mu = np.mean(asset_growth_data) print("Average GS growth over the past 7 years: " + str(asset_mu)) plt.plot(asset_growth_data) plt.title('GS Growth Over the Past 7 years') plt.xlabel('Time') plt.ylabel('Variance') plot_filename = directory + '/Graphs/gs_growth_data.png' plt.savefig(plot_filename, bbox_inches='tight') plt.close() # Distribution fitting to find the prior distribution