def train_portfolio_helper(filename, pf_returns, mktrf, zero_init): # drop missing returns idx_missing = (pf_returns.iloc[:, 1] < -99) pf_returns = pf_returns.loc[~idx_missing] merged = pd.merge(pf_returns, mktrf, on='date') # Retrieve portfolio info and print pf_name = pf_returns.columns[1] no_obs = len(merged) no_lags = len(mktrf.columns) - 3 # return if the number of observations is not sufficient enough if no_obs < (no_lags + 1) * 2: return print("") print("*************************************************") print("({})".format(datetime.today())) print("portfolio: {}".format(pf_name)) print("no_obs: {}".format(no_obs)) print("*************************************************") if (pf_name == 'smb') or (pf_name == 'hml'): y_data = merged.loc[:, pf_name] else: y_data = merged.loc[:, pf_name] - merged.loc[:, 'rf'] y_data = y_data.as_matrix().reshape(-1, 1) x_data = merged.iloc[:, 2:] del x_data['rf'] x_data = x_data.as_matrix() # Initialize the trainer max_retries = 5 for attempt_id in range(max_retries): trainer = Trainer(depth=2, width=1, no_inputs=x_data.shape[1], zero_init=zero_init) trainer.run_ols_regression(x_data, y_data) params = trainer.train(x_data, y_data, x_tolerance=1e-6, cost_tolerance=1e-6) del trainer if not check_if_overfitted_by_param( params, freq="daily", no_lags=no_lags): break params = None zero_init = False print("Parameters are overfitted. Let's try again.") print("") # if parameters are still overfitted, replace parameters with OLS coefficients if params is None: trainer = Trainer(depth=2, width=1, no_inputs=x_data.shape[1], zero_init=True) trainer.run_ols_regression(x_data, y_data) params = trainer.flush_params_to_dict() # compute beta if str.find(filename, 'daily') >= 0: freq = 'daily' else: freq = 'monthly' beta = compute_beta(param=params, freq=freq, no_lags=no_lags) beta0 = beta[:, 0] beta20 = np.sum(beta, axis=1) beta_average = np.mean(beta20) beta_delay = np.mean(beta20) - np.mean(beta0) beta_convexity = (beta20[0] + beta20[-1]) / 2 - beta20[int( (len(beta20) - 1) / 2)] print("Results:") print(" - filename: {}, portfolio: {}".format(filename, pf_name)) print( " - beta_average: {:.3f}, beta_delay: {:.3f}, beta_convexity: {:.3f}". format(beta_average, beta_delay, beta_convexity)) print("") # save the results to SQL server if beta is not overfitted if not check_if_overfitted_by_beta(beta): sql_loader = DataLoader(connect=True) query = """ select max(id) from beta_portfolios where filename = '{}' and portfolio = '{}' and lags = {} """.format(filename, pf_name, no_lags) max_id = sql_loader.sql_query_select(query) if max_id.iloc[0, 0] is None: next_id = 1 else: next_id = max_id.iloc[0, 0] + 1 query = """ insert into beta_portfolios VALUES ('{}', '{}', {}, {}, {}, now(), '{}', {}, {}, {}) """.format(filename, pf_name, no_lags, next_id, no_obs, json.dumps(params), beta_average, beta_delay, beta_convexity) sql_loader.sql_query_commit(query) sql_loader.close() else: print("* Do not save the results because the beta is overfitted *") print("") return
def train_stock_returns_full_periods(permno_from, permno_to): no_lags = 10 print("***********************************") print(" Train Stock Returns") print(" permno_from: {}".format(permno_from)) print(" permno_to : {}".format(permno_to)) print(" no_lags : {}".format(no_lags)) print("***********************************") loader = DataLoader(connect=True) total_training_no = 0 total_training_time = 0 print("Loading market returns...") mktrf = loader.load_market_returns('daily', no_lags=no_lags) print("Loading untouched firm list...") query = """ select permno, no_obs from beta_stocks_full_periods where permno >= {} and permno <= {} and touched is null order by permno """.format(permno_from, permno_to) permno_list = loader.sql_query_select(query) if permno_list is None: print("Cannot find any untouched firm for permno between {} and {}". format(permno_from, permno_to)) return for i in range(len(permno_list)): permno = permno_list.loc[i, 'permno'] print(" ") print("*****************************************") print(datetime.today()) print("permno: {} ({}/{})".format(permno, i + 1, len(permno_list))) print("*****************************************") t_start = time.time() # load stock returns print("Loading stock returns...") stock_rets = loader.load_stock_returns(permno) merged = pd.merge(mktrf, stock_rets, on='date') no_obs = len(merged) print("# observations: {}".format(no_obs)) if no_obs < 100: print("Skip this stock due to the lack of observations (obs:{})". format(no_obs)) continue y_data = merged.loc[:, 'ret'] * 100 - merged.loc[:, 'rf'] y_data = y_data.as_matrix().reshape(-1, 1) x_data = merged.iloc[:, 1:-1] del x_data['rf'] x_data = x_data.as_matrix() # Initialize the trainer max_retries = 3 zero_init = True for attempt_id in range(max_retries): trainer = Trainer(depth=2, width=1, no_inputs=no_lags + 1, zero_init=zero_init) trainer.run_ols_regression(x_data, y_data) params = trainer.train(x_data, y_data, x_tolerance=1e-2, cost_tolerance=1e-3) del trainer if not check_if_overfitted_by_param( params, freq="daily", no_lags=no_lags): break params = None zero_init = False print("Parameters are overfitted. Let's try again.") print("") # if parameters are still overfitted, replace parameters with OLS coefficients if params is None: trainer = Trainer(depth=2, width=1, no_inputs=no_lags + 1, zero_init=True) trainer.run_ols_regression(x_data, y_data) params = trainer.flush_params_to_dict() # compute beta beta = compute_beta(param=params, freq='daily', no_lags=no_lags) assert (not check_if_overfitted_by_beta(beta)) beta0 = beta[:, 0] beta20 = np.sum(beta, axis=1) beta_average = np.mean(beta20) beta_delay = np.mean(beta20) - np.mean(beta0) beta_convexity = (beta20[0] + beta20[-1]) / 2 - beta20[int( (len(beta20) - 1) / 2)] query = """ update beta_stocks_full_periods set touched = now(), parameters = '{}', beta_average = {}, beta_delay = {}, beta_convexity = {} where permno = {} """.format(json.dumps(params), beta_average, beta_delay, beta_convexity, permno) loader.sql_query_commit(query) elapsed = time.time() - t_start total_training_no += 1 total_training_time += elapsed print("Results:") print(" - permno: {}, no_obs:{}".format(permno, no_obs)) print( " - beta_average: {:.3f}, beta_delay: {:.3f}, beta_convexity: {:.3f}" .format(beta_average, beta_delay, beta_convexity)) print("") print("Elapsed time: {:.3f} seconds".format(elapsed)) print("") print("Total training no: {}".format(total_training_no)) print("Total training time: {:.2f} minutes ({:.2f} hours)".format( total_training_time / 60, total_training_time / 3600)) print("Average training time: {:.3f} seconds".format( total_training_time / total_training_no)) loader.close()