def plotHPE(period='M', ratio='PE'): # Check pre-requisite basics = loadStockBasics() if u.isNoneOrEmpty(basics): print('Need to have stock basics!') raise SystemExit # Iterate over all stocks basics_number = len(basics) for i in range(basics_number): stock_id = u.stockID(basics.loc[i, 'code']) # Plot HPE Data plot_HPE(stock_id=stock_id, period=period, ratio=ratio)
def updateFinanceSummary(force_update=True): # Check pre-requisite basics = loadStockBasics() if u.isNoneOrEmpty(basics): print('Need to have stock basics!') raise SystemExit # Iterate over all stocks basics_number = len(basics) for i in range(basics_number): stock_id = u.stockID(basics.loc[i, 'code']) # Check if valid data file already exists if not validFinanceSummary(stock_id, force_update): getFinanceSummary(stock_id) print('Update Finance Summary:', stock_id)
def lshqCompare(): # Check pre-requisite basics = loadStockBasics() if u.isNoneOrEmpty(basics): print('Need to have stock basics!') raise SystemExit # Iterate over all stocks stocks_number = len(basics) unmatched = [] for i in range(stocks_number): stock_id = u.stockID(basics.loc[i,'code']) file = c.file_trading_lshq % u.stockFileName(stock_id, False) if not fileCompare(src_path + file, tar_path + file): unmatched.append(stock_id) # Output Unmatched print(unmatched)
def updatePriceStock(incremental=False): # Check pre-requisite basics = loadStockBasics() if u.isNoneOrEmpty(basics): print('Need to have stock basics!') raise SystemExit # Iterate over all stocks basics_number = len(basics) for i in range(basics_number): stock_id = u.stockID(basics.loc[i, 'code']) time_to_market = u.dateFromStr(basics.loc[i, 'timeToMarket']) getDailyHFQ(stock_id=stock_id, is_index=False, date_start=time_to_market, date_end=date_end, time_to_market=time_to_market, incremental=incremental) print('Update Price:', stock_id)
def updatePriceCXG(incremental=False): # Check pre-requisite cxg = loadCXG() if u.isNoneOrEmpty(cxg): print('Need to have CXG data!') raise SystemExit # Iterate over all CXG stocks cxg_number = len(cxg) print('Number of CXG:', cxg_number) for i in range(cxg_number): stock_id = u.stockID(cxg.ix[i, 'code']) time_to_market = u.dateFromStr(cxg.loc[i, 'timeToMarket']) getDailyHFQ(stock_id=stock_id, is_index=False, date_start=time_to_market, date_end=date_end, time_to_market=time_to_market, incremental=incremental) print('Update Price:', stock_id)
def generateIndex(index_name, base_date, base_point, weight_method, benchmark_id): # Load Index Component Stocks component = load_component(index_name) if u.isNoneOrEmpty(component): print('Index Component Not Available:', index_name) raise SystemExit if gs.is_debug: print(component.head(10)) # Update Benchmark Index LSHQ to Latest date_start = u.dateFromStr(base_date) date_end = u.today() getDailyHFQ(stock_id=benchmark_id, is_index=True, date_start=date_start, date_end=date_end, time_to_market=None, incremental=True) print('Update Price:', benchmark_id) # Update Component Stock LSHQ to Latest component_number = len(component) for i in range(component_number): stock_id = u.stockID(component.ix[i, 'code']) getDailyHFQ(stock_id=stock_id, is_index=False, date_start=date_start, date_end=date_end, time_to_market=None, incremental=True) print('Update Price:', stock_id) # Generate Index generate_index(index_name, base_date, base_point, weight_method, benchmark_id)
def strategyCXG(hc_segments=5, yk_segments=10): ''' 函数功能: -------- 运行次新股策略:选出打开涨停板后仍旧创新高的个股。 输入参数: -------- hc_segments, 回撤分段数量 yk_segments, 盈亏分段数量 输出参数: -------- DataFrame code,代码 name,名称 industry,所属行业 area,地区 timeToMarket,上市日期 ss_price, 上市价 kb_price, 开板价 zx_price, 最新价 ss_ratio, 上市以来涨幅 kb_ratio, 开板以来涨幅 new_high, 是否创开板后新高 假定:CXG数据文件和对应的日后复权数据文件都已经更新。 ''' # Load CXG Data cxg = loadCXG() cxg_number = len(cxg) # Init New Columns for column in [ 'ss_price', 'kb_price', 'zx_price', 'ss_ratio', 'kb_ratio', 'kb_index' ]: cxg[column] = 0 for column in ['kb', 'high_than_kb', 'new_high']: cxg[column] = False # Setup Back Test Parameters # 第一组:给定回撤比例(相对于开板价),统计能够获得的最高收益及比例 hc_price_columns = [] hc_ratio_columns = [] hc_index_columns = [] hc_ratios = [] for j in range(hc_segments): hc_price_columns.append('hc_%d_price' % (10 * (j + 1))) hc_ratio_columns.append('hc_%d_ratio' % (10 * (j + 1))) hc_index_columns.append('hc_%d_index' % (10 * (j + 1))) hc_ratios.append(0.1 * (j + 1)) hc_price_columns.append('hc_high_price') hc_ratio_columns.append('hc_high_ratio') hc_index_columns.append('hc_high_index') hc_price_columns.append('hc_low_price') hc_ratio_columns.append('hc_low_ratio') hc_index_columns.append('hc_low_index') for column in hc_price_columns: cxg[column] = 0 for column in hc_ratio_columns: cxg[column] = 0 for column in hc_index_columns: cxg[column] = 0 print(hc_price_columns) print(hc_ratio_columns) print(hc_index_columns) print(hc_ratios) # 第二组:给定止盈止损区间,统计止盈止损触及的次数 yk_columns = [] yk_ratios = [] for j in range(yk_segments): ratio = float(j + 1) / float(yk_segments) yk_columns.append('yk_%.1f%%' % (100.0 * ratio)) yk_ratios.append(ratio) for column in yk_columns: cxg[column] = 0 print(yk_columns) print(yk_ratios) # # Iterate Over Each CXG Stock Data - Find KaiBai and Back Test # kb_type = 'close' for i in range(cxg_number): stock_id = u.stockID(cxg.ix[i, 'code']) # Load Stock Daily QFQ Data lshq = loadDailyQFQ(stock_id, False) if u.isNoneOrEmpty(lshq): continue else: lshq.set_index('date', inplace=True) lshq.sort_index(ascending=True, inplace=True) cxg.ix[i, 'ss_price'] = lshq.ix[0, 'open'] cxg.ix[i, 'zx_price'] = lshq.ix[-1, 'close'] cxg.ix[ i, 'ss_ratio'] = cxg.ix[i, 'zx_price'] / cxg.ix[i, 'ss_price'] - 1 # Whether Reach New High After KaiBan lshq_number = len(lshq) kb_price = lshq.ix[0, kb_type] kb_index = 0 # Find KaiBan Price and Index for j in range(1, lshq_number): if lshq.ix[j, 'high'] > lshq.ix[j, 'low']: kb_price = lshq.ix[j, kb_type] kb_index = j break if kb_index == 0: cxg.ix[i, 'kb'] = False cxg.ix[i, 'kb_index'] = lshq_number # 连续封板的次数 cxg.ix[i, 'high_than_kb'] = 'No' cxg.ix[i, 'new_high'] = 'No' else: cxg.ix[i, 'kb'] = True ls_high = kb_price for j in range(kb_index, lshq_number): ls_high = lshq.ix[ j, 'close'] if lshq.ix[j, 'close'] > ls_high else ls_high cxg.ix[i, 'kb_price'] = kb_price cxg.ix[i, 'kb_index'] = kb_index # 连续封板的次数,也是开板的索引 cxg.ix[i, 'kb_ratio'] = cxg.ix[i, 'zx_price'] / cxg.ix[ i, 'kb_price'] - 1 cxg.ix[i, 'high_than_kb'] = 'Yes' if cxg.ix[ i, 'zx_price'] >= kb_price else 'No' cxg.ix[i, 'new_high'] = 'Yes' if cxg.ix[ i, 'zx_price'] == ls_high else 'No' # Only back test stocks that have already KaiBan if cxg.ix[i, 'kb'] == False: continue # Back Test - Group 1 high_since_kb = kb_price low_since_kb = kb_price high_index = kb_index low_index = kb_index for j in range(kb_index + 1, lshq_number): close = lshq.ix[j, 'close'] if close > high_since_kb: high_since_kb = close high_index = j if close < low_since_kb: low_since_kb = close low_index = j high_ratio = high_since_kb / kb_price - 1 low_ratio = 1 - low_since_kb / kb_price # Update high_since_kb for each bucket for k in range(hc_segments): hc_ratio = hc_ratios[k] if j == kb_index + 1 or low_ratio <= hc_ratio: # First bar after KaiBan may exceed threshold cxg.ix[i, hc_price_columns[ k]] = high_since_kb # 'hc_%d_price' cxg.ix[ i, hc_ratio_columns[k]] = high_ratio # 'hc_%d_ratio' cxg.ix[ i, hc_index_columns[k]] = high_index # 'hc_%d_index' # Update high_since_kb cxg.ix[i, hc_price_columns[-2]] = high_since_kb cxg.ix[i, hc_ratio_columns[-2]] = high_since_kb / kb_price - 1 cxg.ix[i, hc_index_columns[-2]] = high_index # Update low_since_kb cxg.ix[i, hc_price_columns[-1]] = low_since_kb cxg.ix[i, hc_ratio_columns[-1]] = 1 - low_since_kb / kb_price cxg.ix[i, hc_index_columns[-1]] = low_index # Back Test - Group 2 high_since_kb = kb_price low_since_kb = kb_price high_index = kb_index low_index = kb_index for j in range(kb_index + 1, lshq_number): close = lshq.ix[j, 'close'] ratio_prev = 2.0 ratio = 2.0 value = 0 if close > high_since_kb: # 收盘价创开板以来新高 ratio_prev = high_since_kb / kb_price - 1 ratio = close / kb_price - 1 value = 1 * j high_since_kb = close elif close < low_since_kb: # 收盘价创开板以来新低 ratio_prev = 1 - low_since_kb / kb_price ratio = 1 - close / kb_price value = -1 * j low_since_kb = close # Fill Corresponding yk_segments for k in range(yk_segments): yk_ratio = yk_ratios[k] if cxg.ix[i, yk_columns[ k]] == 0: # This range has NOT been touched before if yk_ratio > ratio_prev and yk_ratio <= ratio: # For those ranges contain yk_ratio cxg.ix[i, yk_columns[k]] = value # Format Data Frame jg_columns = ['ss_price', 'kb_price', 'zx_price', 'ss_ratio', 'kb_ratio'] data_columns = [ jg_columns, hc_price_columns, hc_ratio_columns, hc_index_columns, yk_columns ] for column in [x for j in data_columns for x in j]: cxg[column] = cxg[column].map(lambda x: '%.3f' % x) cxg[column] = cxg[column].astype(float) cxg.set_index('code', inplace=True) # Save to CSV File file_postfix = 'CXG' u.to_csv(cxg, c.path_dict['strategy'], c.file_dict['strategy_r'] % file_postfix) # Statistics for Back Test - Group 2 stats_indexs = yk_columns stats_indexs_number = len(stats_indexs) stats_columns = [ 'ratio', 'win_count', 'win_mean', 'win_std', 'win_min', 'win_25%', 'win_50%', 'win_75%', 'win_max', 'lose_count', 'lose_mean', 'lose_std', 'lose_min', 'lose_25%', 'lose_50%', 'lose_75%', 'lose_max' ] stats_columns_number = len(stats_columns) data_init = np.random.randn(stats_indexs_number * stats_columns_number) for i in range(stats_indexs_number * stats_columns_number): data_init[i] = np.nan stats = pd.DataFrame(data_init.reshape(stats_indexs_number, stats_columns_number), index=stats_indexs, columns=stats_columns) for r in range(yk_segments): win_days = [] lose_days = [] for i in range(cxg_number): value = cxg.ix[cxg.index[i], yk_columns[r]] if value == 0: # No data continue elif value > 0: win_days.append(value) else: lose_days.append(-value) stats_win_days = pd.Series(win_days).describe() stats_lose_days = pd.Series(lose_days).describe() for j in range(8): stats.iloc[r, 1 + j] = stats_win_days.iloc[j] stats.iloc[r, 9 + j] = stats_lose_days.iloc[j] stats.iloc[r, 0] = float(stats.iloc[r, 1]) / (float(stats.iloc[r, 1]) + float(stats.iloc[r, 9])) # Save Statistics to CSV File file_postfix = 'CXG_Stats' u.to_csv(stats, c.path_dict['strategy'], c.file_dict['strategy_r'] % file_postfix)
def updateSamplePrice(benchmark_id, stock_ids, is_index, period): ''' 函数功能: -------- 根据基准指数的时间范围和采样周期,对指定股票/指数列表的收盘价格进行全采样。 输入参数: -------- benchmark_id : string, 指数代码 e.g. '000300',隐含起止时间。 period : string, 采样周期 e.g. 'M',支持'D', 'W', and 'M'。 输出参数: -------- 采样成功时,allprice : pandas.Series,采样结果。 采样失败时,None ''' # Load Benchmark benchmark = loadDailyQFQ(benchmark_id, True) if u.isNoneOrEmpty(benchmark): print('Require Benchmark LSHQ File: %s!' % u.stockFileName(benchmark_id, True)) return None # Resample Benchmark benchmark['date'] = benchmark['date'].astype(np.datetime64) benchmark.set_index('date', inplace=True) benchmark.sort_index(ascending=True, inplace=True) if gs.is_debug: print(benchmark.head(10)) drop_columns = ['open', 'high', 'low', 'volume', 'amount'] benchmark.drop(drop_columns, axis=1, inplace=True) bench_resample = pd.DataFrame() if period == 'D': bench_resample = benchmark else: bench_resample = benchmark.resample(period).first() bench_resample['close'] = benchmark['close'].resample(period).last() # Resample daily data by weekly may introduce N/A price (due to holiday weeks) # This does not exist for monthly resample (as no holiday month so far) if period == 'W': bench_resample.dropna(axis=0, how='any', inplace=True) bench_resample['close'] = bench_resample['close'].map(lambda x: '%.3f' % x) bench_resample['close'] = bench_resample['close'].astype(float) stock_list = [bench_resample] # Iterate over all stocks stocks_number = len(stock_ids) for i in range(stocks_number): # Load Stock LSHQ stock_id = u.stockID(stock_ids[i]) stock = loadDailyQFQ(stock_id, is_index) if u.isNoneOrEmpty(stock): print('Require Stock/Index LSHQ File: %s!' % u.stockFileName(stock_id, is_index)) continue stock['date'] = stock['date'].astype(np.datetime64) stock.set_index('date', inplace=True) stock.sort_index(ascending=True, inplace=True) if gs.is_debug: print(stock.head(10)) # Resample Stock LSHQ stock.drop(drop_columns, axis=1, inplace=True) if period == 'D': stock_resample = stock else: stock_resample = stock.resample(period).first() stock_resample['close'] = stock['close'].resample(period).last() stock_resample['close'] = stock_resample['close'].map( lambda x: '%.3f' % x) stock_resample['close'] = stock_resample['close'].astype(float) # Merge Benchmark with Stock df = pd.merge(bench_resample, stock_resample, how='left', left_index=True, right_index=True, sort=True, suffixes=('', '_' + stock_id)) df.drop(['close'], axis=1, inplace=True) stock_list.append(df) # Merge Results allprice = pd.concat(stock_list, axis=1, join='inner') return allprice
common_postfix = '_'.join(['Coefficient', date_start, date_end, period, ratio_method, target_name, 'vs', benchmark_id]) if analyze_strategy: analyzeCoefficient(common_postfix, completeness_threshold, top_number) # Plot Strategy Results plot_strategy = False if plot_strategy: path = c.path_dict['strategy'] file = c.file_dict['strategy'] % '_'.join(['Common', 'AllPrice', benchmark_id, period, 'AllStock']) price_allstock = u.read_csv(path+file) file = c.file_dict['strategy'] % '_'.join(['Common', 'AllPrice', benchmark_id, period, 'AllIndex']) price_allindex = u.read_csv(path+file) # Generate Statistics List statistics = [] for coefficient in ['Correlation', 'Beta', 'Alpha']: for classification in ['Positive', 'Zero', 'Negative']: statistics.append(classification+coefficient) print(statistics) # Plot Statistics List for stats in statistics: # Plot statistics file = c.file_dict['strategy'] % '_'.join([common_postfix, completeness_threshold, stats]) data = u.read_csv(path+file) postfix = '_'.join(['Coefficient', date_start, date_end, period, completeness_threshold]) plotCoefficient(data['code'], price_allstock, postfix, stats, benchmark_name) # Plot single stock within each statistics for i in range(len(data)): stock_id = u.stockID(data.ix[i,'code']) plotCoefficient([stock_id], price_allstock, postfix, 'Positive_Correlation_'+stock_id, benchmark_name)
def generate_index(index_name, base_date, base_point, weight_method, benchmark_id): """ 根据给定指数成分股,基期,基点,加权方法和基准指数,计算自定义指数并与基准指数做对比。 Parameters -------- index_name:string 指数名称 e.g. 'FeiYan_NewEnergyVehicle' base_date:string 基期日期 e.g. '2016-12-30' base_point:float 基点数值 e.g. 1000.0 weight_method:string 加权方法 e.g. 'EqualWeight' benchmark_id:string 基准指数 e.g. '000300' Pre-requisites -------- 指数成分股列表 DataCenter/Index/IndexComponent_%s.csv % index_name 基准指数历史行情 DataCenter/Trading/LSHQ/Trading_LSHQ_Index_%s.csv % benchmark_id 成分股历史行情 DataCenter/Trading/LSHQ/Trading_LSHQ_Stock_%s.csv % stock_id Return -------- DataFrame date 日期 e.g. 2005-03-31 open 开盘价 high 最高价 close 收盘价 low 最低价 volume 成交量 amount 成交额 如果数据文件不存在或者自定义指数计算失败,返回None。 如果自定义指数计算成功,返回DataFrame,设置date为索引,并根据date进行升序排序。 """ # Load Benchmark Index benchmark = loadDailyQFQ(benchmark_id, True) if u.isNoneOrEmpty(benchmark): print('Benchmark LSHQ Not Available:', benchmark_id) raise SystemExit # Load Index Component Stocks component = load_component(index_name) if u.isNoneOrEmpty(component): print('Index Component Not Available:', index_name) raise SystemExit # Check Weight Method if not weight_method == 'EqualWeight': print('Un-supported Weight Method:', weight_method) raise SystemExit # Create Index Dataframe index = benchmark[benchmark.date >= base_date] # Filtered by base_date index = index[['date', 'close']] index.set_index('date', inplace=True) index.sort_index(ascending=True, inplace=True) index_number = len(index) # Add New Columns for column in ['ratio', 'index', 'b_ratio', 'b_index']: index[column] = np.nan # Extract Component Stock LSHQ component_number = len(component) for i in range(component_number): stock_id = u.stockID(component.ix[i, 'code']) df = loadDailyQFQ(stock_id, False) # Check Availability if u.isNoneOrEmpty(df): print('Stock LSHQ Not Available:', stock_id) raise SystemExit # Slice Data stock = df[df.date >= base_date] # Filtered by base_date stock = stock[['date', 'close']] stock.set_index('date', inplace=True) stock.sort_index(ascending=True, inplace=True) # Merge Index with Stock index = pd.merge(index, stock, how='left', left_index=True, right_index=True, sort=True, suffixes=('', '_' + stock_id)) # Fill Missing Data column = 'close_' + stock_id if np.isnan(index.ix[0, column]): # First Data is NaN index.ix[0, column] = find_stock_close(df, base_date) for j in range(1, index_number): if np.isnan(index.ix[j, column]): index.ix[j, column] = index.ix[j - 1, column] # Calculate Index Value for i in range(index_number): ratio = 0.0 for j in range(component_number): stock_id = u.stockID(component.ix[j, 'code']) column = 'close_' + stock_id ratio = ratio + (index.ix[i, column] / index.ix[0, column]) - 1.0 ratio = ratio / float(component_number) index.ix[i, 'ratio'] = ratio index.ix[i, 'index'] = float(base_point) * (1.0 + ratio) index.ix[i, 'b_ratio'] = (index.ix[i, 'close'] / index.ix[0, 'close']) - 1.0 index.ix[i, 'b_index'] = index.ix[i, 'close'] # Save to CSV File if not u.isNoneOrEmpty(index): u.to_csv(index, c.path_dict['index'], c.file_dict['index_r'] % index_name)
def plot_coefficient_price(stock_ids, allprice, postfix, series_name, benchmark_name): # If want to debug benchmark only (without stocks), set below flag to True. debug_benchmark_only = False # Extract Stock Prices and Normalize Them row_number = len(allprice) stock_number = len(stock_ids) columns = ['date', benchmark_name] if not debug_benchmark_only: for i in range(stock_number): stock_id = u.stockID(stock_ids[i]) columns.append(stock_id) prices = u.createDataFrame(row_number, columns) prices['date'] = allprice['date'] prices[benchmark_name] = allprice['close'] if not debug_benchmark_only: for i in range(stock_number): stock_id = u.stockID(stock_ids[i]) prices[stock_id] = allprice['close_' + stock_id] if debug_benchmark_only: print('Original Price') print(prices) # Normalize Price for i in range(1, len(columns)): column = columns[i] prices[column] = normalize_price(prices[column]) if debug_benchmark_only: print('Normalized Price') print(prices) # Calculate Relative Price w.r.t. First Valid Price for i in range(1, len(columns)): column = columns[i] row = -1 for j in range(row_number): if not np.isnan(prices.ix[j, column]): # Find first valid price row = j break if row != -1: if debug_benchmark_only: print('Row =', row) ref_price = prices.ix[ row, column] # Need to be cached in the first place as it will be normalized to one later. for j in range(row, row_number): cur_price = prices.ix[j, column] if not np.isnan(cur_price): prices.ix[ j, column] = 1.0 + (cur_price - ref_price) / ref_price if debug_benchmark_only: print('Relative Price') print(prices) # Plot Figure fig = plt.figure(figsize=(32, 18), dpi=72, facecolor="white") axes = plt.subplot(111) axes.cla() # Clear Axes # Define Font font = { 'family': 'serif', 'color': 'black', 'weight': 'normal', 'size': 18, } # Plot Sub-figure 1 title = '%s vs. %s' % (series_name, benchmark_name) plt.title(title, fontdict=font) axes.set_xlabel('', fontdict=font) axes.set_ylabel('Ratio', fontdict=font) prices.plot(x='date', y=benchmark_name, ax=axes, color='grey', lw=2.0, ls='--') if not debug_benchmark_only: for i in range(stock_number): column = u.stockID(stock_ids[i]) prices.plot(x='date', y=column, ax=axes) # Common Format for Both Sub-figures axes.grid(True) fig.autofmt_xdate() fig.tight_layout() plt.setp(plt.gca().get_xticklabels(), rotation=30) plt.show() # Save Figure fig_key = 'fig_coef' fig_path = c.path_dict[fig_key] fig_name = '_'.join( [postfix, series_name, 'vs', benchmark_name, u.dateToStr(u.today())]) fig_file = c.file_dict[fig_key] % fig_name u.saveFigure(fig, fig_path, fig_file)