def port_mean(self): ''' 组合的收益率期望 ''' series = pd.Series({'port mean': self.port_rate().mean()}) if self.path: makedir(self.path, 'rate') series.to_csv(f'{self.path}\\rate\\port_mean.csv') return self.port_rate().mean()
def cov(self): ''' 各风险资产协方差矩阵 ''' df = self.calculate()['cov'] if self.path: makedir(self.path, 'rate') df.to_csv(f'{self.path}\\rate\\cov.csv') return df
def __init__(self, names=['比亚迪', '阳光电源', '璞泰来', '紫光国微', '盛新锂能'], start_date='2021-05-01', end_date='2021-11-01', frequency='d', rfr=0.023467, funds=10000000, path='.\\Markovitz cache\\'): self.names = names self.lens = len(names) self.start_date = start_date self.end_date = end_date self.frequency = frequency self.rfr = (rfr*100) / \ {'d': 365, 'w': 52, 'm': 30}[frequency] self.funds = funds self.path = path if self.path: makedir(self.path, '') sprint('Initializing...') if not self.path: sd = StockData(names=self.names, start_date=self.start_date, end_date=self.end_date, frequency=self.frequency) self.datas = sd.stocks_data() else: try: self.datas = pd.read_csv( f'{self.path}\\stock data\\stocks_data.csv') except: sd = StockData(names=self.names, start_date=self.start_date, end_date=self.end_date, frequency=self.frequency, path=self.path) self.datas = sd.stocks_data() self.datas.index = self.datas['name'] self.data = self.datas.reset_index(drop=True) self.date = list(map(lambda x: str(x)[:10], self.data.date.unique())) self.first_date = self.date[0] self.last_date = self.date[-1] # 第一天开盘价 self.first_price = self.data[self.data.date == self.data.date.unique()[0]][[ 'open', 'name' ]].set_index('name').to_dict()['open'] # 最后一天收盘价 self.last_price = self.data[self.data.date == self.data.date.unique()[-1]][[ 'close', 'name' ]].set_index('name').to_dict()['close'] # 每只股票最大手数 self.max_shares_dict = { name: math.floor(self.funds / (shares * 100)) for name, shares in self.last_price.items() }
def __init__(self, names=['贵州茅台', '隆基股份'], start_date='2019-12-01', end_date='2020-12-31', frequency='d', adjustflag='3', path='.\\StockData Cache\\'): self.names = names self.start_date = start_date self.end_date = end_date self.frequency = frequency self.adjustflag = adjustflag # 默认不复权 self.path = path if self.path: makedir(self.path, '') login()
def boundary_scatter_data(self, number=500): ''' 边界散点数据,默认生成500个 ''' if self.path: try: df_scatter = pd.read_csv( f'{self.path}\\scatter data\\scatter_data.csv', index=False) except: df_scatter = self.scatter_data() else: df_scatter = self.scatter_data() data_dict = self.calculate() data_mean = data_dict['mean'] data_cov = data_dict['cov'] scatter_list = [] sprint('Searching for boundary scatter...') for i in trange(number): random_rate = random.uniform(df_scatter.rate.min(), df_scatter.rate.max()) constraints = ({ 'type': 'eq', 'fun': lambda weights: weights.sum() - 1 }, { 'type': 'eq', 'fun': lambda weights: data_mean.dot(weights.T)['pctChg'] - random_rate }) opts = sco.minimize( fun=lambda weights: weights.dot(data_cov).dot(weights.T), x0=np.ones(self.lens) / self.lens, bounds=tuple((0, 1) for x in range(self.lens)), constraints=constraints) scatter_list.append([opts.x, np.sqrt(opts.fun), random_rate]) df_boundary_scatter = pd.DataFrame(scatter_list, columns=['weights', 'risk', 'rate']) df_boundary_scatter['sharpe'] = (df_boundary_scatter.rate - self.rfr) / df_boundary_scatter.risk df_boundary_scatter = df_boundary_scatter.sort_values(by='sharpe', ascending=False) if self.path: makedir(self.path, 'scatter data') df_boundary_scatter.to_csv( f'{self.path}\\scatter data\\boundary_scatter_data.csv') return df_boundary_scatter
def max_loss(self, level=0.05): ''' 在5%显著性水平下,组合中每种风险资产的最大跌幅 ''' df_pctChg = self.data[['name', 'date', 'pctChg']] loss_dict = dict( zip(self.names, [ abs( np.percentile(df_pctChg[df_pctChg.name == name]['pctChg'], level * 100)) for name in self.names ])) loss_series = pd.Series(loss_dict) if self.path: makedir(self.path, 'ask') loss_series.to_csv( f'{self.path}\\ask\\max_loss(level={level}).csv') return loss_series
def port_rate(self): ''' 按照各资产权重计算的组合收益率序列 --> 返回一个Series ''' df_pctChg = self.data[['name', 'date', 'pctChg']] df_pctChg['weights'] = df_pctChg.name.map(self.weights_dict) df_pctChg[ 'weighted_pctChg'] = df_pctChg['weights'] * df_pctChg['pctChg'] df = pd.DataFrame() for name in self.names: df[name] = list( df_pctChg[df_pctChg.name == name]['weighted_pctChg']) df.index = self.date if self.path: makedir(self.path, 'rate') df.sum(axis=1).to_csv(f'{self.path}\\rate\\port_rate_series.csv') return df.sum(axis=1)
def calculate(self): ''' 计算收益率均值、协方差矩阵、相关系数矩阵 ''' data = self.data[['date', 'name', 'pctChg']] # 收益率均值 data_mean = data.groupby('name').mean().T[self.names] # 协方差矩阵 & 相关系数矩阵 df = pd.DataFrame() for name in self.names: df[name] = list(data[data['name'] == name]['pctChg']) data_cov = df.cov() data_corr = df.corr() if self.path: makedir(self.path, 'mean,cov,corr') data_mean.T.to_csv(f'{self.path}\\mean,cov,corr\\data_mean.csv') data_cov.to_csv(f'{self.path}\\mean,cov,corr\\data_cov.csv') data_corr.to_csv(f'{self.path}\\mean,cov,corr\\data_corr.csv') return {'mean': data_mean, 'cov': data_cov, 'correlation': data_corr}
def __init__(self, names=['比亚迪', '阳光电源', '璞泰来', '紫光国微', '盛新锂能'], start_date='2021-05-01', end_date='2021-11-01', frequency='w', rfr=0.023467, market_index='沪深300指数', path='.\\CAPM cache\\'): self.names = names self.lens = len(names) self.start_date = start_date self.end_date = end_date self.frequency = frequency self.rfr = (rfr*100) / \ {'d': 365, 'w': 52, 'm': 30}[frequency] self.market_index = market_index self.path = path if self.path: makedir(self.path, '') sprint('Initializing...') if not self.path: sd = StockData(names=self.names + [market_index], start_date=self.start_date, end_date=self.end_date, frequency=self.frequency) self.datas = sd.stocks_data() else: try: self.datas = pd.read_csv( f'{self.path}\\stock data\\stocks_data.csv') except: sd = StockData(names=self.names + [market_index], start_date=self.start_date, end_date=self.end_date, frequency=self.frequency, path=self.path) self.datas = sd.stocks_data() self.datas.index = self.datas['name'] self.data = self.datas.loc[self.names].reset_index(drop=True) self.Rm_data = self.datas.loc[self.market_index].reset_index(drop=True)
def init_scatter_data(self): ''' 初始临近整数组合散点 ''' # 初始临近整数组合权重 df_init_shares = self.init_port() for name in self.names: df_init_shares[name] = df_init_shares[name] * self.last_price[name] df_init_shares['sum'] = df_init_shares.sum(axis=1) for name in self.names: df_init_shares[name] = df_init_shares[name] / df_init_shares['sum'] df_init_shares = df_init_shares[df_init_shares['sum'] <= self.funds / 100] weights = np.array([ list(i.values()) for i in list(df_init_shares[self.names].T.to_dict().values()) ]) # 收益率、风险和夏普比率 data_dict = self.calculate() data_mean = data_dict['mean'] data_cov = data_dict['cov'] # 散点DataFrame df_init_scatter = pd.DataFrame() # 随机权重 df_init_scatter['weights'] = pd.Series(map(lambda x: str(x), weights)) # 风险 df_init_scatter['risk'] = np.sqrt( np.diagonal(weights.dot(data_cov).dot(weights.T))) # 收益率 df_init_scatter['rate'] = data_mean.dot(weights.T).T['pctChg'] # 夏普比率 df_init_scatter['sharpe'] = (df_init_scatter.rate - self.rfr) / df_init_scatter.risk df_init_scatter = df_init_scatter.sort_values(by='sharpe', ascending=False) if self.path: makedir(self.path, 'scatter data') df_init_scatter.to_csv( f'{self.path}\\scatter data\\df_init_scatter.csv') return df_init_scatter
def stocks_info(self): ''' Return a dict containing stock names, codes and ipoDate { '贵州茅台': {'code': 'sh.600519', 'ipoDate': '2001-08-27'}, '隆基股份': {'code': 'sh.601012', 'ipoDate': '2012-04-11'}, ... } ''' info = {} sprint('Loading stocks information...') for name in tqdm(self.names): rs = bs.query_stock_basic(code_name=name) stock_info = get_data(rs) info[name] = {'code': stock_info['code'][0], 'ipoDate': stock_info['ipoDate'][0]} if self.path: makedir(self.path, 'stock data') df_info = pd.DataFrame(info).T df_info['name'] = df_info.index df_info.to_csv( f'{self.path}\\stock data\\stocks_info.csv', index=False) return info
def scl(self, name='', show=True): ''' 给定资产的证券特征线 ''' if name not in self.names: sprint(f'name参数值未给定,或参数值{name}不在给定风险资产范围内!已重新随机选择一种给定风险资产!', color='red') name = random.choice(self.names) Ri = self.data[self.data.name == name]['pctChg'] Rm = self.Rm_data['pctChg'] ls_dict = self.ls_beta(Ri) plt.cla() plt.axline(xy1=(0, ls_dict['alpha_ols']), slope=ls_dict['beta_ols'], c='m') plt.scatter(Ri, Rm, s=10, marker=".", c='b') plt.xlabel(f'{name}收益率(%)') plt.ylabel(f'{self.market_index}收益率(%)') if show: plt.show() else: makedir(self.path, 'scl') plt.savefig(f'{self.path}\\scl\\{name}.svg', format='svg')
def drawdown(self, show=True): ''' 组合和各风险资产的最大回撤 ''' df_rate = pd.DataFrame() df_rate['port'] = self.port_rate() df_rate[self.market_index] = list(self.Rm_data['pctChg']) data = self.data[['name', 'date', 'pctChg']] for name in self.names: df_rate[name] = list(data[data.name == name]['pctChg']) del data # 构建财富指数 wealth = (1 + df_rate / 100).cumprod() # 找出上一个最高点 previous_max = wealth.cummax() # 回撤率 draw_down = (wealth - previous_max) / previous_max # 折线图 if show: wealth.plot() previous_max.plot() draw_down.plot() plt.show() else: makedir(self.path, 'drawdown') wealth.plot() plt.savefig(f'{self.path}\\drawdown\\wealth.svg', format='svg') previous_max.plot() plt.savefig(f'{self.path}\\drawdown\\previous_max.svg', format='svg') draw_down.plot() plt.savefig(f'{self.path}\\drawdown\\draw_down.svg', format='svg') if self.path: makedir(self.path, 'drawdown') draw_down.min().to_csv(f'{self.path}\\drawdown\\max drawdown.csv') return draw_down.min()
def scatter_data(self, number=5000): ''' 散点数据,默认生成5000个 ''' data_dict = self.calculate() data_mean = data_dict['mean'] data_cov = data_dict['cov'] weights = self.weights(number=number) # 散点DataFrame df_scatter = pd.DataFrame() # 随机权重 df_scatter['weights'] = pd.Series(map(lambda x: str(x), weights)) # 风险 df_scatter['risk'] = np.sqrt( np.diagonal(weights.dot(data_cov).dot(weights.T))) # 收益率 df_scatter['rate'] = data_mean.dot(weights.T).T['pctChg'] # 夏普比率 df_scatter['sharpe'] = (df_scatter.rate - self.rfr) / df_scatter.risk df_scatter = df_scatter.sort_values(by='sharpe', ascending=False) if self.path: makedir(self.path, 'scatter data') df_scatter.to_csv(f'{self.path}\\scatter data\\scatter_data.csv') return df_scatter
def kline(self): data = self.calculate().to_dict('list') kline = ( Kline().add_xaxis(xaxis_data=data["date"]).add_yaxis( series_name="portfolio index", y_axis=data["datas"], itemstyle_opts=opts.ItemStyleOpts( color="#ef232a", color0="#14b143", border_color="#ef232a", border_color0="#14b143", ), ).set_global_opts( xaxis_opts=opts.AxisOpts( type_="category", is_scale=True, boundary_gap=False, axisline_opts=opts.AxisLineOpts(is_on_zero=False), splitline_opts=opts.SplitLineOpts(is_show=False), split_number=20, min_="dataMin", max_="dataMax", ), yaxis_opts=opts.AxisOpts( is_scale=True, splitline_opts=opts.SplitLineOpts(is_show=True)), datazoom_opts=[ opts.DataZoomOpts(is_show=False, type_="inside", xaxis_index=[0, 0], range_end=100), opts.DataZoomOpts(is_show=True, xaxis_index=[0, 1], pos_top="97%", range_end=100), opts.DataZoomOpts(is_show=False, xaxis_index=[0, 2], range_end=100), ], tooltip_opts=opts.TooltipOpts( trigger="axis", axis_pointer_type="cross", background_color="rgba(245, 245, 245, 0.8)", border_width=1, border_color="#ccc", textstyle_opts=opts.TextStyleOpts(color="#000"), ), brush_opts=opts.BrushOpts( x_axis_index="all", brush_link="all", out_of_brush={"colorAlpha": 0.1}, brush_type="lineX", ), # 三个图的 axis 连在一块 axispointer_opts=opts.AxisPointerOpts( is_show=True, link=[{ "xAxisIndex": "all" }], label=opts.LabelOpts(background_color="#777"), ), )) kline_line = (Line().add_xaxis(xaxis_data=data["date"]).add_yaxis( series_name="MA5", y_axis=self.calculate_ma(day_count=5, data=data), is_smooth=True, linestyle_opts=opts.LineStyleOpts(opacity=0.5), label_opts=opts.LabelOpts(is_show=False), ).add_yaxis( series_name="MA10", y_axis=self.calculate_ma(day_count=10, data=data), is_smooth=True, linestyle_opts=opts.LineStyleOpts(opacity=0.5), label_opts=opts.LabelOpts(is_show=False), ).add_yaxis( series_name="MA20", y_axis=self.calculate_ma(day_count=20, data=data), is_smooth=True, linestyle_opts=opts.LineStyleOpts(opacity=0.5), label_opts=opts.LabelOpts(is_show=False), ).add_yaxis( series_name="MA30", y_axis=self.calculate_ma(day_count=30, data=data), is_smooth=True, linestyle_opts=opts.LineStyleOpts(opacity=0.5), label_opts=opts.LabelOpts(is_show=False), ).set_global_opts( xaxis_opts=opts.AxisOpts( type_="category", grid_index=1, axislabel_opts=opts.LabelOpts(is_show=False), ), yaxis_opts=opts.AxisOpts( grid_index=1, split_number=3, axisline_opts=opts.AxisLineOpts(is_on_zero=False), axistick_opts=opts.AxisTickOpts(is_show=False), splitline_opts=opts.SplitLineOpts(is_show=False), axislabel_opts=opts.LabelOpts(is_show=True), ), )) # Overlap Kline + Line overlap_kline_line = kline.overlap(kline_line) # Bar-1 bar_1 = (Bar().add_xaxis(xaxis_data=data["date"]).add_yaxis( series_name="Volumn", y_axis=data["volume"], xaxis_index=1, yaxis_index=1, label_opts=opts.LabelOpts(is_show=False), itemstyle_opts=opts.ItemStyleOpts(color=JsCode(""" function(params) { var colorList; if (barData[params.dataIndex][1] > barData[params.dataIndex][0]) { colorList = '#ef232a'; } else { colorList = '#14b143'; } return colorList; } """)), ).set_global_opts( xaxis_opts=opts.AxisOpts( type_="category", grid_index=1, axislabel_opts=opts.LabelOpts(is_show=False), ), legend_opts=opts.LegendOpts(is_show=False), )) # Bar-2 (Overlap Bar + Line) bar_2 = (Bar().add_xaxis(xaxis_data=data["date"]).add_yaxis( series_name="MACD", y_axis=data["MACD"], xaxis_index=2, yaxis_index=2, label_opts=opts.LabelOpts(is_show=False), itemstyle_opts=opts.ItemStyleOpts(color=JsCode(""" function(params) { var colorList; if (params.data >= 0) { colorList = '#ef232a'; } else { colorList = '#14b143'; } return colorList; } """)), ).set_global_opts( xaxis_opts=opts.AxisOpts( type_="category", grid_index=2, axislabel_opts=opts.LabelOpts(is_show=False), ), yaxis_opts=opts.AxisOpts( grid_index=2, split_number=4, axisline_opts=opts.AxisLineOpts(is_on_zero=False), axistick_opts=opts.AxisTickOpts(is_show=False), splitline_opts=opts.SplitLineOpts(is_show=False), axislabel_opts=opts.LabelOpts(is_show=True), ), legend_opts=opts.LegendOpts(is_show=False), )) line_2 = (Line().add_xaxis(xaxis_data=data["date"]).add_yaxis( series_name="DIFF", y_axis=data["DIFF"], xaxis_index=2, yaxis_index=2, label_opts=opts.LabelOpts(is_show=False), ).add_yaxis( series_name="DEA", y_axis=data["DEA"], xaxis_index=2, yaxis_index=2, label_opts=opts.LabelOpts(is_show=False), ).set_global_opts(legend_opts=opts.LegendOpts(is_show=False))) # 最下面的柱状图和折线图 overlap_bar_line = bar_2.overlap(line_2) # 最后的 Grid grid_chart = Grid( init_opts=opts.InitOpts(width="1500px", height="750px")) # 这个是为了把 data.datas 这个数据写入到 html 中,还没想到怎么跨 series 传值 # demo 中的代码也是用全局变量传的 grid_chart.add_js_funcs("var barData = {}".format(data["datas"])) # K线图和 MA5 的折线图 grid_chart.add( overlap_kline_line, grid_opts=opts.GridOpts(pos_left="3%", pos_right="1%", height="60%"), ) # Volumn 柱状图 grid_chart.add( bar_1, grid_opts=opts.GridOpts(pos_left="3%", pos_right="1%", pos_top="71%", height="10%"), ) # MACD DIFS DEAS grid_chart.add( overlap_bar_line, grid_opts=opts.GridOpts(pos_left="3%", pos_right="1%", pos_top="82%", height="14%"), ) makedir(self.path, 'kline') grid_chart.render( f"{self.path}\\kline\\{str(self.names)}_{time.strftime('%Y-%m-%d', time.localtime())}.html" )