def showtable5(): df1 = beta_equal_annual.mean() df2 = pd.Series(nwttest_1samp(beta_equal_annual, 0)[0], index=df1.index) df3 = pd.Series(nwttest_1samp(beta_equal_annual, 0)[1], index=df1.index) table = pd.concat([df1, df2, df3], axis=1).T del table['year'] table.index = ['Average', 't-statistic', 'p-value'] table = table.applymap(lambda x: round(x, 2)) print(table)
def trasecfee(feerate): ''' :param feerate: 交易费率 :return:不输出只打印结果 ''' for i in range(len(factorlist)): temp = pd.read_csv('../Database/returnseries/12/' + factorlist[i]) long_short = np.array(temp['long-short']) afterLS = list(long_short - feerate*2) length = 12 name = factorlist[i] T_value = [] Mean = [] p_value = [] sharpratio = [] Std = [] TO = [afterLS] for l in TO: t_test = nwttest_1samp(l, 0, L=1) mean = np.average(l) * 12- rf12.mean().tolist()[0] * 12 / 100 STD = np.std(l) * np.sqrt(12) sharp = (mean) / STD T_value.append(t_test.statistic) p_value.append(t_test.pvalue) Mean.append(mean) Std.append(STD) sharpratio.append(sharp) print(name, 'long-short') print('mean', Mean[0] / 12) print('t-statistic', '('+str(round(T_value[0],4))+')') ff3 = pd.read_csv('../Database/ff3.csv') ff5 = pd.read_csv('../Database/ff5.csv') A = pd.DataFrame(afterLS, columns=['long-short']) M = pd.concat([A], axis=1) alpha3 = [] t3 = [] t5 = [] alpha5 = [] for i in range(1): X1 = ff3.iloc[length:, 1:] X2 = ff5.iloc[length:, 1:] Y = M.iloc[:-2, i] Y.index = X1.index Y = Y - rf12.RF[:-1] / 100 used1 = {'X': X1, 'Y': Y} reg = smf.ols(formula='Y~1+X', data=used1).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) t3.append(reg.tvalues[0]) alpha3.append(reg.params[0] * 12) used2 = {'X': X2, 'Y': Y} reg = smf.ols(formula='Y~1+X', data=used2).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) t5.append(reg.tvalues[0]) alpha5.append(reg.params[0] * 12) print('alpha-FF3', alpha3[0]/12) print('t-statistic', '('+str(round(t3[0],4))+')') print('alpha-FF5', alpha5[0]/12,) print('t-statistic','('+str(round(t5[0],4))+')') print('sharpe', sharpratio[0]) gc.collect() print('*'*30)#分隔开不同收益序列
def returnseriestest(length): path = r'..\DataBase\returnseries' + '\\' + str(length) file = glob.glob(os.path.join(path, "*.csv")) ols = pd.read_csv(path + '\\OLS' + str(length) + '.csv') dfn = pd.read_csv(path + '\\DFN' + str(length) + '.csv') k = [] # 每个算法一个df for i in range(len(file)): k.append(pd.read_csv(file[i])) #OLS与其他算法区别 for i in range(len(k)): t = [] t1 = nwttest_1samp(k[i].iloc[:, 1] - ols['long-short'], 0) t.append(t1.statistic) t2 = nwttest_1samp(k[i].iloc[:, 2] - ols['long'], 0) t.append(t2.statistic) t3 = nwttest_1samp(k[i].iloc[:, 3] - ols['short'], 0) t.append(t3.statistic) print('ols-' + file[i][27:-4], t) #DFN与其他算法区别 for i in range(len(k)): t = [] t1 = nwttest_1samp(-k[i].iloc[:, 1] + dfn['long-short'], 0) t.append(t1.statistic) t2 = nwttest_1samp(-k[i].iloc[:, 2] + dfn['long'], 0) t.append(t2.statistic) t3 = nwttest_1samp(-k[i].iloc[:, 3] + dfn['short'], 0) t.append(t3.statistic) print('dfn-' + file[i][27:-4], t) return
def showtable8(): table = pd.DataFrame() datalist = [beta_equal_annual, mktcap_equal_annual, bm_equal_annual] namelist = ['beta', 'MktCap', 'BM'] for i in range(3): df1 = datalist[i].mean() df2 = pd.Series(nwttest_1samp(datalist[i], 0)[0], index=df1.index) x = pd.concat([df1, df2], axis=1).T del x['year'] x.columns = [1, 2, 3, 4, 5, 6, 7, '7-1'] x.index = [namelist[i], ' '] table = pd.concat([table, x]) table = table.applymap(lambda x: round(x, 2)) print(table)
def beta_mktcap_excess(data): #df = pd.DataFrame(columns = data.columns[2:]) df = pd.DataFrame() for i in data['X2_group'][:5]: X = pd.DataFrame(columns=data.columns[2:]) x = pd.DataFrame(columns=data.columns) for year in range(1988, 2012): temp = data[(data['year'] == year) & (data['X2_group'] == i)] x = x.append(temp, ignore_index=True) x.index = x['year'] x = x[x.columns[2:]] x = x.astype(np.float64) X = X.append(x.mean(), ignore_index=True) tv1 = pd.Series(nwttest_1samp(x, 0)[0], index=x.columns) X = X.append(tv1, ignore_index=True) #接下来回归FFC_alpha,用到前面的四因子表格ff temp_alpha = [] temp_tv = [] for j in x.columns: temp_data = pd.concat([x[j], ff], axis=1) model = smf.ols(str(j) + '~mktrf+smb+hml+umd', temp_data).fit(cov_type='HAC', cov_kwds={'maxlags': 6}) temp_alpha.append(model.params[0]) temp_tv.append(model.tvalues[0]) temp_alpha = pd.Series(temp_alpha, index=x.columns) temp_tv = pd.Series(temp_tv, index=x.columns) X = X.append(temp_alpha, ignore_index=True) X = X.append(temp_tv, ignore_index=True) X.index = [[i, i, i, i], [ 'Excess_return', 't_value_er', 'FFC_alpha', 't_value_alpha' ]] X.index.name = ['X2_group', 'Coefficient'] df = pd.concat([df, X]) return df
def showtable7(): data = all_beta_group.copy() col = [1, 2, 3, 4, 5, 6, 7] df1 = data.groupby('group')['beta'].mean() df2 = data.groupby('group')['mktcap'].mean() df3 = data.groupby('group')['bm'].mean() df1.index = col df2.index = col df3.index = col X = [] for i in ['beta', 'mktcap', 'bm']: temp = data.groupby(['year', 'group'])[i].mean().reset_index() temp = pd.pivot_table(temp, index='year', columns='group')[i].reset_index() name = temp.columns temp['diff'] = temp[name[-1]] - temp[name[1]] x = nwttest_1samp(temp['diff'], 0)[0] X.append(x) X = pd.Series(X, name='7-1 t-statistic', index=['beta', 'MktCap', 'BM']) table = pd.concat([df1, df2, df3], axis=1).T table.index = ['beta', 'MktCap', 'BM'] table['7-1'] = table[7] - table[1] table['7-1 t-statistic'] = X print(table)
def FC(length,rf, timeseries, lenn=96, na='FC'): #length为滑动窗口长度:取值{3,12,24,36} #na为输出文件名称 #rf为无风险利率,取值与length对应{rf3,rf12,rf24,rf36} Long_Short = [] Long = [] Short = [] for i in range(len(timeseries) - length): print(i) FINALm = pd.concat(timeseries[i:i + (length +1)], axis=0) FINALm = FINALm.fillna(0) FINAL_X = FINALm.iloc[:, :-2] FINAL_x = scale(FINAL_X) final = pd.concat(timeseries[i:i + length], axis=0) x_train = FINAL_x[:len(final)] x_test = FINAL_x[len(final):] y_train = final.iloc[:, -1] test = timeseries[i + length] clf = LinearRegression() k = [] for i in range(lenn): x = x_train[:, i].reshape(-1, 1) clf.fit(x, y_train) k.append(clf.coef_[0]) PREDICTION = [] for i in range(len(x_test)): test0 = np.array(x_test[i]) y = 0 for j in range(lenn): y = y + test0[j] * k[j] PREDICTION.append(y) y_test = test.iloc[:, -1] # 构建投资组合 r_predict = pd.DataFrame(PREDICTION, columns=['predict']) r_ture = pd.DataFrame(y_test) r_ture.columns = ['ture'] r_ture.index = r_predict.index FINAL = pd.concat([r_predict, r_ture], axis=1) FINAL_sort = FINAL.sort_values(by='predict', axis=0) r_final = np.array(FINAL_sort['ture']) m = int(len(r_final) * 0.1) + 1 r_final = r_final.tolist() long = r_final[-m:] short = r_final[:m] r_end = (np.sum(long) - np.sum(short)) / m Long_Short.append(r_end) Long.append(np.average(long)) Short.append(np.average(short)) T_value = [] Mean = [] p_value = [] sharpratio = [] Std = [] TO = [Long_Short, Long, Short] for l in TO: t_test = nwttest_1samp(l, 0) mean = np.average(l) * 12 STD = np.std(l) * np.sqrt(12) sharp = (mean - rf.mean().tolist()[0] * 12 / 100) / STD T_value.append(t_test.statistic) p_value.append(t_test.pvalue) Mean.append(mean) Std.append(STD) sharpratio.append(sharp) name = na length = length print(name, 'long-short', 'long', 'short') print('mean', Mean[0] / 12, Mean[1] / 12, Mean[2] / 12) print('t-statistic', '(' + str(round(T_value[0], 4)) + ')', '(' + str(round(T_value[1], 4)) + ')', '(' + str(round(T_value[2], 4)) + ')') A = pd.DataFrame(Long_Short, columns=['long-short']) B = pd.DataFrame(Long, columns=['long']) C = pd.DataFrame(Short, columns=['short']) M = pd.concat([A, B, C], axis=1) M.to_csv('..\output\\'+name + '.csv') ff3 = pd.read_csv('..\DataBase\\ff3.csv') ff5 = pd.read_csv('..\DataBase\\ff5.csv') alpha3 = [] t3 = [] t5 = [] alpha5 = [] for i in range(3): X1 = ff3.iloc[length:, 1:] X2 = ff5.iloc[length:, 1:] Y = M.iloc[:-2, i] Y.index = X1.index Y = Y - rf.RF[:-1] / 100 x1 = sm.add_constant(X1) reg = sm.OLS(Y, x1).fit() t3.append(reg.tvalues[0]) alpha3.append(reg.params[0] * 12) x2 = sm.add_constant(X2) reg = sm.OLS(Y, x2).fit() t5.append(reg.tvalues[0]) alpha5.append(reg.params[0] * 12) print('alpha-FF3', alpha3[0] / 12, alpha3[1] / 12, alpha3[2] / 12) print('t-statistic', '(' + str(round(t3[0], 4)) + ')', '(' + str(round(t3[1], 4)) + ')', '(' + str(round(t3[2], 4)) + ')') print('alpha-FF5', alpha5[0] / 12, alpha5[1] / 12, alpha5[2] / 12) print('t-statistic', '(' + str(round(t5[0], 4)) + ')', '(' + str(round(t5[1], 4)) + ')', '(' + str(round(t5[2], 4)) + ')') print('sharpe', sharpratio[0], sharpratio[1], sharpratio[2])
def comboutput(length, clf, name, rf,timeseries2, index): Long_Short = [] Long = [] Short = [] for i in range(len(timeseries2) - length): print(i) # LSTM数据 FINALm = pd.concat(timeseries2[i:(i + length + 1)], axis=0) FINALm[~FINALm['ret'].isin(['null'])] = FINALm[~FINALm['ret'].isin(['null'])].fillna(0) FINAL_X = FINALm.iloc[:, :-2] FINAL_x = FINAL_X FINAL_x[~FINALm['ret'].isin(['null'])] = scale(FINAL_X[~FINALm['ret'].isin(['null'])]) FINAL_x[FINALm['ret'].isin(['null'])] = 0 FINALm[FINALm['ret'].isin(['null'])] = 0 Nx_train = [FINAL_x.iloc[j * 3571:(j + 1) * 3571, :].values for j in range(length)] Ny_train = [FINALm.iloc[j * 3571:(j + 1) * 3571, -1].values for j in range(length)] Nx_test = [FINAL_x.iloc[j * 3571:(j + 1) * 3571, :].values for j in range(1, length + 1)] ## 传统数据 dl = timeseries2[i:i + (length + 1)] p = [dl[j][~index[i+j]] for j in range(len(dl))] TTX = pd.concat(p, axis=0) TTX = TTX.fillna(0) TXX = TTX.iloc[:, :-2] TXx = scale(TXX) final = pd.concat(p[:length], axis=0) Tx_train = TXx[:len(final)] Tx_test = TXx[len(final):] Ty_train = final.iloc[:, -1] test = p[-1] Ty_test = test.iloc[:, -1] # 基准-linear clf = clf clf.fit(Tx_train, Ty_train, Nx_train, Ny_train) PREDICTION = clf.predict(Tx_test, Nx_test, index[i+length], length) r_predict = pd.DataFrame(PREDICTION, columns=['predict']) r_ture = pd.DataFrame(Ty_test) r_ture.columns = ['ture'] r_ture.index = r_predict.index FINAL = pd.concat([r_predict, r_ture], axis=1) FINAL_sort = FINAL.sort_values(by='predict', axis=0) r_final = np.array(FINAL_sort['ture']) m = int(len(r_final) * 0.1) + 1 r_final = r_final.tolist() long = r_final[-m:] short = r_final[:m] r_end = (np.sum(long) - np.sum(short)) / m Long_Short.append(r_end) Long.append(np.average(long)) Short.append(np.average(short)) gc.collect() T_value = [] Mean = [] p_value = [] sharpratio = [] Std = [] TO = [Long_Short, Long, Short] for l in TO: t_test = nwttest_1samp(l, 0, L=1) mean = np.average(l) * 12 - rf.mean().tolist()[0] * 12 / 100 STD = np.std(l) * np.sqrt(12) sharp = (mean) / STD T_value.append(t_test.statistic) p_value.append(t_test.pvalue) Mean.append(mean) Std.append(STD) sharpratio.append(sharp) print(name, 'long-short', 'long', 'short') print('mean', Mean[0] / 12, Mean[1] / 12, Mean[2] / 12) print('t-statistic', '(' + str(round(T_value[0], 4)) + ')', '(' + str(round(T_value[1], 4)) + ')', '(' + str(round(T_value[2], 4)) + ')') A = pd.DataFrame(Long_Short, columns=['long-short']) B = pd.DataFrame(Long, columns=['long']) C = pd.DataFrame(Short, columns=['short']) M = pd.concat([A, B, C], axis=1) M.to_csv('..\output\\' + name + '.csv') ff3 = pd.read_csv('..\DataBase\\ff3.csv') ff5 = pd.read_csv('..\DataBase\\ff5.csv') alpha3 = [] t3 = [] t5 = [] alpha5 = [] for i in range(3): X1 = ff3.iloc[length:, 1:] X2 = ff5.iloc[length:, 1:] Y = M.iloc[:-2, i] Y.index = X1.index Y = Y - rf.RF[:-1] / 100 x1 = sm.add_constant(X1) reg = sm.OLS(Y, x1).fit() t3.append(reg.tvalues[0]) alpha3.append(reg.params[0] * 12) x2 = sm.add_constant(X2) reg = sm.OLS(Y, x2).fit() t5.append(reg.tvalues[0]) alpha5.append(reg.params[0] * 12) print('alpha-FF3', alpha3[0] / 12, alpha3[1] / 12, alpha3[2] / 12) print('t-statistic', '(' + str(round(t3[0], 4)) + ')', '(' + str(round(t3[1], 4)) + ')', '(' + str(round(t3[2], 4)) + ')') print('alpha-FF5', alpha5[0] / 12, alpha5[1] / 12, alpha5[2] / 12) print('t-statistic', '(' + str(round(t5[0], 4)) + ')', '(' + str(round(t5[1], 4)) + ')', '(' + str(round(t5[2], 4)) + ')') print('sharpe', sharpratio[0], sharpratio[1], sharpratio[2])
def output2(length,CLF,name,rf,timeseries2): # length为滑动窗口长度:取值{3,12,24,36} # CLF为预测选取的机器学习模型 # name为输出文件名称(type:string) # rf为无风险利率,取值与length对应{rf3,rf12,rf24,rf36} Long_Short = [] Long = [] Short = [] for i in range(len(timeseries2) - length): FINALm = pd.concat(timeseries2[i:(i + length + 1)], axis=0) FINALm[~FINALm['ret'].isin(['null'])] = FINALm[~FINALm['ret'].isin(['null'])].fillna(0) FINAL_X = FINALm.iloc[:, :-2] FINAL_x = FINAL_X FINAL_x[~FINALm['ret'].isin(['null'])] = scale(FINAL_X[~FINALm['ret'].isin(['null'])]) FINAL_x[FINALm['ret'].isin(['null'])] = 0 FINALm[FINALm['ret'].isin(['null'])] = 0 x_train = [FINAL_x.iloc[j * 3571:(j + 1) * 3571, :].values for j in range(length)] y_train = [FINALm.iloc[j * 3571:(j + 1) * 3571, -1].values for j in range(length)] x_test = np.array([FINAL_x.iloc[j * 3571:(j + 1) * 3571, :].values for j in range(1, length + 1)]) y_test = list(FINALm.iloc[(length) * 3571:(length + 1) * 3571, -1].values) # 基准-linear clf = CLF clf.fit(x_train, y_train) PREDICTION = clf.predict(x_test) PREDICTION = [PREDICTION[m][0] for m in range(3571 * (length - 1), 3571 * length)] # 构建投资组合 r_predict = pd.DataFrame(PREDICTION, columns=['predict']) r_ture = pd.DataFrame(y_test) r_ture.columns = ['ture'] r_ture.index = r_predict.index FINAL = pd.concat([r_predict, r_ture], axis=1) FINAL = FINAL[~timeseries2[i + length]['ret'].isin(['null'])] FINAL_sort = FINAL.sort_values(by='predict', axis=0) r_final = np.array(FINAL_sort['ture']) m = int(len(r_final) * 0.1) + 1 r_final = r_final.tolist() long = r_final[-m:] short = r_final[:m] r_end = (np.sum(long) - np.sum(short)) / m Long_Short.append(r_end) Long.append(np.average(long)) Short.append(np.average(short)) gc.collect() T_value = [] Mean = [] p_value = [] sharpratio = [] Std = [] TO = [Long_Short, Long, Short] for l in TO: t_test = nwttest_1samp(l, 0, L=1) mean = np.average(l) * 12- rf.mean().tolist()[0] * 12 / 100 STD = np.std(l) * np.sqrt(12) sharp = (mean ) / STD T_value.append(t_test.statistic) p_value.append(t_test.pvalue) Mean.append(mean) Std.append(STD) sharpratio.append(sharp) print(name, 'long-short', 'long', 'short') print('mean', Mean[0] / 12 + rf.mean().tolist()[0] / 100, Mean[1] / 12 + rf.mean().tolist()[0] / 100, Mean[2] / 12 + rf.mean().tolist()[0] / 100) print('t-statistic', '('+str(round(T_value[0],4))+')', '('+str(round(T_value[1],4))+')', '('+str(round(T_value[2],4))+')') A = pd.DataFrame(Long_Short, columns=['long-short']) B = pd.DataFrame(Long, columns=['long']) C = pd.DataFrame(Short, columns=['short']) M = pd.concat([A, B, C], axis=1) M.to_csv('..\output\\' + name + '.csv') ff3 = pd.read_csv('..\DataBase\\ff3.csv') ff5 = pd.read_csv('..\DataBase\\ff5.csv') alpha3 = [] t3 = [] t5 = [] alpha5 = [] for i in range(3): X1 = ff3.iloc[length:, 1:] X2 = ff5.iloc[length:, 1:] Y = M.iloc[:-2, i] Y.index = X1.index Y = Y - rf.RF[:-1] / 100 x1 = sm.add_constant(X1) reg = sm.OLS(Y, x1).fit() t3.append(reg.tvalues[0]) alpha3.append(reg.params[0] * 12) x2 = sm.add_constant(X2) reg = sm.OLS(Y, x2).fit() t5.append(reg.tvalues[0]) alpha5.append(reg.params[0] * 12) print('alpha-FF3', alpha3[0] / 12, alpha3[1] / 12, alpha3[2] / 12) print('t-statistic', '(' + str(round(t3[0], 4)) + ')', '(' + str(round(t3[1], 4)) + ')', '(' + str(round(t3[2], 4)) + ')') print('alpha-FF5', alpha5[0] / 12, alpha5[1] / 12, alpha5[2] / 12) print('t-statistic', '(' + str(round(t5[0], 4)) + ')', '(' + str(round(t5[1], 4)) + ')', '(' + str(round(t5[2], 4)) + ')') print('sharpe', sharpratio[0], sharpratio[1], sharpratio[2])
x.columns = [1, 2, 3, 4, 5, 6, 7, '7-1'] x.index = [namelist[i], ' '] table = pd.concat([table, x]) table = table.applymap(lambda x: round(x, 2)) print(table) showtable8() ###生成表格9 beta_equal_annual = annual_equal_mean(beta_group) beta_equal_annual.index = beta_equal_annual['year'] del beta_equal_annual['year'] #excessreturn部分结果 df1 = beta_equal_annual.mean() df2 = pd.Series(nwttest_1samp(beta_equal_annual, 0)[0], index=df1.index) excess_return = pd.concat([df1, df2], axis=1).T excess_return.index = ['excess return', ' '] excess_return.index.name = None excess_return.columns = [1, 2, 3, 4, 5, 6, 7, 8] #制作capm部分结果 CAPM = pd.DataFrame(index=['alpha', ' ', 'beta_mkt', ' ']) for i in range(8): name = beta_equal_annual.columns data = pd.concat([beta_equal_annual[name[i]], ff['mktrf']], axis=1) capm = smf.ols(name[i] + '~mktrf', data).fit(cov_type='HAC', cov_kwds={'maxlags': 6}) X = [capm.params[0], capm.tvalues[0], capm.params[1], capm.tvalues[1]] X = pd.Series(X, name=i + 1, index=['alpha', ' ', 'beta_mkt', ' ']) CAPM = pd.concat([CAPM, X], axis=1) #制作FF部分结果
't', 'ff30ret', 't' ]) rf = pd.read_csv('RF.csv') for i in range(len(factoreq.columns) - 1): if factoreq.iloc[:, i + 1].mean() > 0: faceq = factoreq.iloc[:, i + 1] else: faceq = -factoreq.iloc[:, i + 1] if factorvw.iloc[:, i + 1].mean() > 0: facvw = factorvw.iloc[:, i + 1] else: facvw = -factorvw.iloc[:, i + 1] ####return reteq0 = faceq.dropna() resultew.iloc[0, i] = reteq0.mean() ttest = nwttest_1samp(reteq0, 0) resultew.iloc[1, i] = ttest.statistic retvw0 = facvw.dropna() resultvw.iloc[0, i] = retvw0.mean() ttest = nwttest_1samp(retvw0, 0) resultvw.iloc[1, i] = ttest.statistic ##excess return exeq = faceq - rf.RF / 100 exvw = facvw - rf.RF / 100 exeq0 = exeq.dropna() resultew.iloc[2, i] = exeq0.mean() #ttest=stats.ttest_1samp(exeq0,0) ttest = nwttest_1samp(exeq0, 0) resultew.iloc[3, i] = ttest.statistic exvw0 = exvw.dropna()