def extractRollingBeta(postfix): # Load Rolling Coefficient fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % postfix coef = u.read_csv(fullpath) if u.isNoneOrEmpty(coef): print('Require Coefficient File: %s!' % fullpath) return False # Extract Rolling Beta row_number = len(coef) beta = u.createDataFrame(row_number, ['date', 'beta']) beta['date'] = coef['date'] for column in coef.columns: if len(column) >= 4 and column[0:4] == 'beta': beta[column] = coef[column] # Calculate Rolling Beta Average beta_number = len(beta.columns) - 2 for i in range(row_number): beta_avg = 0.0 beta_count = 0 for j in range(beta_number): b = beta.ix[i, beta.columns[j + 2]] if not np.isnan(b): beta_avg = beta_avg + b beta_count = beta_count + 1 if beta_count > 0: beta.ix[i, 'beta'] = beta_avg / float(beta_count) beta.set_index('date', inplace=True) postfix = '_'.join([postfix, 'Beta']) u.to_csv(beta, c.path_dict['strategy'], c.file_dict['strategy'] % postfix)
def histogramAlpha(postfix, completeness_threshold): # Load Coefficient File allcoef = loadCoefficient(postfix, completeness_threshold) if u.isNoneOrEmpty(allcoef): return False # Calculate Coefficient Histogram columns = [ 'Total', 'Very High', 'High', 'Medium', 'Low', 'Very Low', 'Negative Very Low', 'Negative Low', 'Negative Medium', 'Negative High', 'Negative Very High' ] histogram = u.createDataFrame(1, columns, 0) stock_number = len(allcoef) histogram.ix[0, 'Total'] = stock_number for i in range(stock_number): alpha = allcoef.ix[i, 'alpha'] if alpha >= 0.4: # [0.4, +Infinity) histogram.ix[0, 'Very High'] = histogram.ix[0, 'Very High'] + 1 elif alpha >= 0.3: # [0.3, 0.4) histogram.ix[0, 'High'] = histogram.ix[0, 'High'] + 1 elif alpha >= 0.2: # [0.2, 0.3) histogram.ix[0, 'Medium'] = histogram.ix[0, 'Medium'] + 1 elif alpha >= 0.1: # [0.1, 0.2) histogram.ix[0, 'Low'] = histogram.ix[0, 'Low'] + 1 elif alpha >= 0.0: # [0.0, 0.1) histogram.ix[0, 'Very Low'] = histogram.ix[0, 'Very Low'] + 1 elif alpha >= -0.1: # [-0.1, 0.0) histogram.ix[ 0, 'Negative Very Low'] = histogram.ix[0, 'Negative Very Low'] + 1 elif alpha >= -0.2: # [-0.2, -0.1) histogram.ix[0, 'Negative Low'] = histogram.ix[0, 'Negative Low'] + 1 elif alpha >= -0.3: # [-0.3, -0.2) histogram.ix[ 0, 'Negative Medium'] = histogram.ix[0, 'Negative Medium'] + 1 elif alpha >= -0.4: # [-0.4, -0.3) histogram.ix[0, 'Negative High'] = histogram.ix[0, 'Negative High'] + 1 else: # (-Infinity, -0.4) histogram.ix[0, 'Negative Very High'] = histogram.ix[ 0, 'Negative Very High'] + 1 # Save to CSV File histogram.set_index('Total', inplace=True) file_postfix = '_'.join( [postfix, completeness_threshold, 'HistogramAlpha']) u.to_csv(histogram, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix) return True
def histogramCorrelation(postfix, completeness_threshold): # Load Coefficient File allcoef = loadCoefficient(postfix, completeness_threshold) if u.isNoneOrEmpty(allcoef): return False # Calculate Coefficient Histogram columns = [ 'Total', 'Very Strong', 'Strong', 'Medium', 'Weak', 'Very Weak', 'Negative Very Weak', 'Negative Weak', 'Negative Medium', 'Negative Strong', 'Negative Very Strong' ] histogram = u.createDataFrame(1, columns, 0) stock_number = len(allcoef) histogram.ix[0, 'Total'] = stock_number for i in range(stock_number): correlation = allcoef.ix[i, 'correlation'] if correlation > 0.8: # (0.8, 1.0] histogram.ix[0, 'Very Strong'] = histogram.ix[0, 'Very Strong'] + 1 elif correlation > 0.6: # (0.6, 0.8] histogram.ix[0, 'Strong'] = histogram.ix[0, 'Strong'] + 1 elif correlation > 0.4: # (0.4, 0.6] histogram.ix[0, 'Medium'] = histogram.ix[0, 'Medium'] + 1 elif correlation > 0.2: # (0.2, 0.4] histogram.ix[0, 'Weak'] = histogram.ix[0, 'Weak'] + 1 elif correlation >= 0.0: # [0.0, 0.2] histogram.ix[0, 'Very Weak'] = histogram.ix[0, 'Very Weak'] + 1 elif correlation > -0.2: # (-0.2, 0.0) histogram.ix[0, 'Negative Very Weak'] = histogram.ix[ 0, 'Negative Very Weak'] + 1 elif correlation > -0.4: # (-0.4, -0.2] histogram.ix[0, 'Negative Weak'] = histogram.ix[0, 'Negative Weak'] + 1 elif correlation > -0.6: # (-0.6, -0.4] histogram.ix[ 0, 'Negative Medium'] = histogram.ix[0, 'Negative Medium'] + 1 elif correlation > -0.8: # (-0.8, -0.6] histogram.ix[ 0, 'Negative Strong'] = histogram.ix[0, 'Negative Strong'] + 1 else: # [-1.0, -0.8] histogram.ix[0, 'Negative Very Strong'] = histogram.ix[ 0, 'Negative Very Strong'] + 1 # Save to CSV File histogram.set_index('Total', inplace=True) file_postfix = '_'.join( [postfix, completeness_threshold, 'HistogramCorrelation']) u.to_csv(histogram, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix) return True
def generateIndexStatistics(index_names, series_name): # Create Statistics DataFrame columns = [ 'name', 'date', 'c_number', 's_number', 'index', 'ratio', 'b_index', 'b_ratio', 'delta_ratio' ] index_number = len(index_names) stat = u.createDataFrame(index_number, columns) # Calculate Index Statistics for i in range(index_number): index_name = index_names[i] result = load_index_result(index_name) if u.isNoneOrEmpty(result): continue result_number = len(result) latest_date = result.ix[result_number - 1, 'date'] component_number, suspended_number = generateComponentStatistics( index_name, latest_date) stat.ix[i, 'name'] = index_name stat.ix[i, 'date'] = latest_date stat.ix[i, 'c_number'] = component_number stat.ix[i, 's_number'] = suspended_number stat.ix[i, 'index'] = result.ix[result_number - 1, 'index'] ratio = result.ix[result_number - 1, 'ratio'] stat.ix[i, 'ratio'] = ratio stat.ix[i, 'b_index'] = result.ix[result_number - 1, 'b_index'] b_ratio = result.ix[result_number - 1, 'b_ratio'] stat.ix[i, 'b_ratio'] = b_ratio stat.ix[i, 'delta_ratio'] = ratio - b_ratio # Format Statistics DataFrame for column in ['name', 'date']: stat[column] = stat[column].astype(str) for column in ['c_number', 's_number']: stat[column] = stat[column].astype(int) for column in ['index', 'b_index']: stat[column] = stat[column].map(lambda x: '%.2f' % x) stat[column] = stat[column].astype(float) for column in ['ratio', 'b_ratio', 'delta_ratio']: stat[column] = stat[column].map(lambda x: '%.2f%%' % (x * 100.0)) stat.set_index('name', inplace=True) # Save to CSV File if not u.isNoneOrEmpty(stat): print(stat) u.to_csv(stat, c.path_dict['index'], c.file_dict['index_s'] % series_name)
def generateIndexSeries(index_names, series_name): # Generate Index Series DataFrame columns = ['date', 'index_benchmark', 'ratio_benchmark'] str_columns = ['date'] float_columns = ['index_benchmark', 'ratio_benchmark'] index_number = len(index_names) for i in range(index_number): index_name = index_names[i] columns.append('index_' + index_name) float_columns.append('index_' + index_name) columns.append('ratio_' + index_name) float_columns.append('ratio_' + index_name) index = load_index_result(index_names[0]) row_number = len(index) series = u.createDataFrame(row_number, columns) print(series) # Init Series with Common Columns for i in range(row_number): series.ix[i, 'date'] = index.ix[i, 'date'] series.ix[i, 'index_benchmark'] = index.ix[i, 'b_index'] series.ix[i, 'ratio_benchmark'] = 1.0 + index.ix[i, 'b_ratio'] # Merge Separate Index Data into Index Series for index_name in index_names: index = load_index_result(index_name) for i in range(row_number): series.ix[i, 'index_' + index_name] = index.ix[i, 'index'] series.ix[i, 'ratio_' + index_name] = 1.0 + index.ix[i, 'ratio'] # Format Columns for column in str_columns: series[column] = series[column].astype(str) for column in float_columns: series[column] = series[column].map(lambda x: '%.2f' % x) series[column] = series[column].astype(float) series.set_index('date', inplace=True) # Save to CSV File if not u.isNoneOrEmpty(series): u.to_csv(series, c.path_dict['index'], c.file_dict['index_r'] % series_name)
def strategyPriceFollow(stock_id, is_index, trend_threshold): # Load Stock Daily QFQ Data lshq = loadDailyQFQ(stock_id, is_index) if u.isNoneOrEmpty(lshq): raise SystemExit # Calculate Trend, Trend High, Trend Low, Trend Ref lshq['trend'] = 'Up' for column in ['trend_high', 'trend_low', 'trend_ref']: lshq[column] = 0.0 for column in ['predict', 'confirm']: lshq[column] = np.nan lshq_number = len(lshq) trends = [] trend_turning_points = [] trend_index_highs = [] trend_index_lows = [] index_high = 0 index_low = 0 for i in range(lshq_number): if i == 0: # Initialization lshq.ix[i, 'trend'] = 'Up' trends.append('Up') trend_turning_points.append(i) for column in ['trend_high', 'trend_low', 'trend_ref']: lshq.ix[i, column] = lshq.ix[i, 'close'] else: trend = lshq.ix[i - 1, 'trend'] trend_high = lshq.ix[i - 1, 'trend_high'] trend_low = lshq.ix[i - 1, 'trend_low'] trend_ref = lshq.ix[i - 1, 'trend_ref'] trend_cur = lshq.ix[i, 'close'] up_to_down = False down_to_up = False if trend == 'Up': if (1.0 - trend_cur / trend_high) > trend_threshold: lshq.ix[i, 'trend'] = 'Down' up_to_down = True trends.append('Down') trend_turning_points.append(i) trend_index_highs.append(index_high) else: lshq.ix[i, 'trend'] = 'Up' up_to_down = False else: if (trend_cur / trend_low - 1.0) > trend_threshold: lshq.ix[i, 'trend'] = 'Up' down_to_up = True trends.append('Up') trend_turning_points.append(i) trend_index_lows.append(index_low) else: lshq.ix[i, 'trend'] = 'Down' down_to_up = False if trend == 'Up': if up_to_down == False: # Up trend continues if trend_cur > trend_high: lshq.ix[i, 'predict'] = 1.0 for j in range( index_high + 1, i + 1 ): # New high confirms all trades since last high to be up-trend lshq.ix[j, 'confirm'] = 1.0 lshq.ix[i, 'trend_high'] = trend_cur index_high = i else: ratio = (1.0 - trend_cur / trend_high) / trend_threshold lshq.ix[i, 'predict'] = 1.0 * (1.0 - ratio) + ( -1.0) * ratio # Map to [1.0, -1.0] lshq.ix[i, 'trend_high'] = trend_high lshq.ix[i, 'trend_ref'] = trend_ref else: # Up trend reverses lshq.ix[i, 'predict'] = -1.0 for j in range( index_high + 1, i + 1 ): # Turning point confirms all trades since last high to be down-trend lshq.ix[j, 'confirm'] = -1.0 lshq.ix[i, 'trend_ref'] = trend_high lshq.ix[i, 'trend_low'] = trend_cur index_low = i else: if down_to_up == False: # Down trend continues if trend_cur < trend_low: lshq.ix[i, 'predict'] = -1.0 for j in range( index_low + 1, i + 1 ): # New low confirms all trades since last low to be down-trend lshq.ix[j, 'confirm'] = -1.0 lshq.ix[i, 'trend_low'] = trend_cur index_low = i else: ratio = (trend_cur / trend_low - 1.0) / trend_threshold lshq.ix[i, 'predict'] = (-1.0) * (1.0 - ratio) + ( 1.0) * ratio # Map to [1.0, -1.0] lshq.ix[i, 'trend_low'] = trend_low lshq.ix[i, 'trend_ref'] = trend_ref else: # Down trend reverses lshq.ix[i, 'predict'] = 1.0 for j in range( index_low + 1, i + 1 ): # Turning point confirms all trades since last low to be up-trend lshq.ix[j, 'confirm'] = 1.0 lshq.ix[i, 'trend_ref'] = trend_low lshq.ix[i, 'trend_high'] = trend_cur index_high = i # Handle Last Trend if i == lshq_number - 1: if lshq.ix[i, 'trend'] == 'Up': trend_index_highs.append(i) else: trend_index_lows.append(i) # Calculate Trend Price lshq['trend_price'] = 0.0 trend_number = len(trends) print('Trend # =', trend_number) index_ref = 0 index_tar = 0 price_ref = 0.0 price_tar = 0.0 idx_high = 0 idx_low = 0 for i in range(trend_number): trend = trends[i] index_tar = trend_index_highs[ idx_high] if trend == 'Up' else trend_index_lows[idx_low] price_ref = lshq.ix[index_ref, 'close'] price_tar = lshq.ix[index_tar, 'close'] for index in range(index_ref, index_tar): ratio = float(index - index_ref) / float(index_tar - index_ref) lshq.ix[ index, 'trend_price'] = price_ref * (1.0 - ratio) + price_tar * ratio if trend == 'Up': index_ref = trend_index_highs[idx_high] idx_high = idx_high + 1 else: index_ref = trend_index_lows[idx_low] idx_low = idx_low + 1 # Handle Last Trend if i == trend_number - 1: lshq.ix[index_tar, 'trend_price'] = price_tar # Record Timing Data trend_number = len(trends) timing = u.createDataFrame(trend_number, ['date', 'trend']) for i in range(trend_number): trend = trends[i] index = trend_turning_points[i] timing.ix[i, 'date'] = lshq.ix[index, 'date'] timing.ix[i, 'trend'] = trend timing.set_index('date', inplace=True) timing.sort_index(ascending=True, inplace=True) # Save to CSV File file_postfix = 'Timing_%s_%s' % (u.stockFileName( stock_id, is_index), trend_threshold) u.to_csv(timing, c.path_dict['strategy'], file_postfix + '.csv', encoding='gbk') # Format Data Frame for column in ['trend_high', 'trend_low', 'trend_ref', 'trend_price']: lshq[column] = lshq[column].map(lambda x: '%.3f' % x) lshq[column] = lshq[column].astype(float) lshq.set_index('date', inplace=True) lshq.sort_index(ascending=True, inplace=True) # Save to CSV File file_postfix = 'PriceFollow_%s_%s' % (u.stockFileName( stock_id, is_index), trend_threshold) u.to_csv(lshq, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix)
def mergePriceFollow(stock_list, is_index, threshold_list): stock_number = len(stock_list) if stock_number < 1: print('Stock Number:', stock_number) raise SystemExit threshold_number = len(threshold_list) if threshold_number < 1: print('Threshold Number:', threshold_number) raise SystemExit # Init Price Follow Statistics for All Indexes stats_columns = ['date', 'index'] for i in range(1, threshold_number - 1): stats_columns.append('wpredict_%s' % threshold_list[i]) stats_columns.append('wtrend_%s' % threshold_list[i]) stats = u.createDataFrame(stock_number, stats_columns) for s in range(stock_number): stock_id = stock_list[s] # Load Results from Different Threshold dfs = [] for i in range(threshold_number): threshold = threshold_list[i] file_postfix = 'PriceFollow_%s_%s' % (u.stockFileName( stock_id, is_index), threshold) fullpath = c.path_dict[ 'strategy'] + c.file_dict['strategy'] % file_postfix df = u.read_csv(fullpath) dfs.append(df) # Compose Final Results drop_columns = [ 'trend', 'trend_high', 'trend_low', 'trend_ref', 'trend_price', 'predict', 'confirm' ] df = dfs[0].drop(drop_columns, axis=1) for i in range(threshold_number): threshold = threshold_list[i] column = 'trend' df[column + '_%s' % threshold] = dfs[i][column] for i in range(threshold_number): threshold = threshold_list[i] column = 'trend_price' df[column + '_%s' % threshold] = dfs[i][column] for i in range(threshold_number): threshold = threshold_list[i] column = 'predict' df[column + '_%s' % threshold] = dfs[i][column] for i in range(threshold_number): threshold = threshold_list[i] column = 'confirm' df[column + '_%s' % threshold] = dfs[i][column] # Weighted Predict Columns cutoff = 0.0 # Optimized cutoff for weighted predict for i in range(1, threshold_number - 1): t_prev = threshold_list[i - 1] t_curr = threshold_list[i] t_next = threshold_list[i + 1] t_total = t_prev + t_curr + t_next column_postfix = '_%s' % t_curr df['wpredict' + column_postfix] = np.nan df['wtrend' + column_postfix] = np.nan row_number = len(df) for j in range(1, row_number): wpredict = 0.0 for t in [t_prev, t_curr, t_next]: wpredict = wpredict + t * df.ix[j, 'predict' + '_%s' % t] wpredict = wpredict / t_total df.ix[j, 'wpredict' + column_postfix] = wpredict df.ix[j, 'wtrend' + column_postfix] = 'Up' if wpredict >= cutoff else 'Down' # Fill One Row of Statistics last_index = len(df) - 1 stats.ix[s, 'date'] = df.ix[last_index, 'date'] stats.ix[s, 'index'] = stock_id for i in range(1, threshold_number - 1): column_postfix = '_%s' % threshold_list[i] stats.ix[s, 'wpredict' + column_postfix] = df.ix[last_index, 'wpredict' + column_postfix] stats.ix[s, 'wtrend' + column_postfix] = df.ix[last_index, 'wtrend' + column_postfix] # Format Columns df.set_index('date', inplace=True) # Save to CSV File file_postfix = 'PriceFollow_%s_All' % u.stockFileName( stock_id, is_index) u.to_csv(df, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix) # Format Columns stats.set_index('date', inplace=True) # Save to CSV File file_postfix = 'PriceFollow_Statistics' u.to_csv(stats, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix)
def strategyRelativity(benchmark_id, date_start, date_end, period, stock_ids, is_index, stock_name): ''' 函数功能: -------- 按照给定起止时间和采样频率,计算全市场所有指数和业绩基准之间的相对强弱。 假定:全市场指数列表,指数历史前复权数据,业绩基准历史前复权数据已经提前获取并存储为CSV文件。 输入参数: -------- benchmark_id : string, 指数代码 e.g. '000300' date_start : string, 起始日期 e.g. '2005-01-01' date_end : string, 终止日期 e.g. '2016-12-31' period : string, 采样周期 e.g. 'M' stock_ids : pandas.Series or list, 股票/指数列表 is_index : boolean, 股票/指数标识 stock_name : string, 股票/指数名称 输出参数: -------- True/False : boolean, 策略运行是否完成 数据文件 Strategy_Relativity_DateStart_DateEnd_Period_StockName_vs_Benchmark.csv : 参与计算的所有股票/指数相对强弱 ''' # Check Period if not checkPeriod(period): return False # Sample Prices price = samplePrice(benchmark_id, stock_ids, is_index, date_start, date_end, period) if u.isNoneOrEmpty(price): return False # Output Data df = price.copy() # Calculate Relativity # 1. Turn Prices to Ratios date_number = len(price) column_number = len(price.columns) relativity = u.createDataFrame(date_number, price.columns, np.nan) relativity['date'] = price['date'] for col in range(1, column_number): # Skip 'date' column = relativity.columns[col] for i in range(0, date_number): prev_price = price.ix[i, column] if i == 0 else price.ix[i - 1, column] curr_price = price.ix[i, column] if not np.isnan(prev_price) and not np.isnan( curr_price): # Both are valid prices relativity.ix[i, column] = (curr_price - prev_price ) / prev_price # Turn price to ratio for col in range(1, column_number): # Skip 'date' column = relativity.columns[col] df['ratio_' + column[-6:]] = relativity[column] # 2. Turn Ratios to Deltas for col in range(2, column_number): # Skip 'date' and 'close' column = relativity.columns[col] for i in range(0, date_number): ratio_stock = relativity.ix[i, column] ratio_bench = relativity.ix[i, 'close'] if not np.isnan(ratio_stock) and not np.isnan( ratio_bench): # Both are valid ratios relativity.ix[ i, column] = ratio_stock - ratio_bench # Turn ratio to delta for col in range(2, column_number): # Skip 'date' and 'close' column = relativity.columns[col] df['delta_' + column[-6:]] = relativity[column] # 3. Turn Deltas to Accumulated Deltas for col in range(2, column_number): # Skip 'date' and 'close' column = relativity.columns[col] for i in range(0, date_number): prev_delta = relativity.ix[i, column] if i == 0 else relativity.ix[ i - 1, column] curr_delta = relativity.ix[i, column] if not np.isnan(prev_delta) and not np.isnan( curr_delta): # Both are valid deltas relativity.ix[ i, column] = prev_delta + curr_delta # Turn delta to accumulated delta for col in range(2, column_number): # Skip 'date' and 'close' column = relativity.columns[col] df['accumu_' + column[-6:]] = relativity[column] # Save to CSV File df.set_index('date', inplace=True) file_postfix = '_'.join([ 'Relativity', date_start, date_end, period, stock_name, 'vs', benchmark_id ]) u.to_csv(df, c.path_dict['strategy'], c.file_dict['strategy'] % file_postfix) return True
def calculateCoefficient(price, ignore_number, min_period_number, ratio_method): # Create Coefficient Data Frame stocks_number = len(price.columns) - 2 # Remove 'date', 'close_benchmark' if stocks_number <= 0: print('No Stock Data to Calculate Coefficient!') return None coef = u.createDataFrame( stocks_number, columns=['code', 'completeness', 'alpha', 'beta', 'correlation']) # Calculate Coefficients # 1. Calculate Correlation - No need to interpolate price for stop trading benchmark = price[price.columns[1]] bench_ratio = dataToRatio(benchmark, ratio_method) for i in range(stocks_number): column = price.columns[i + 2] stock = price[column].copy() # Turn price to ratio stock_ratio = dataToRatio(stock, ratio_method) # Manually ignore a given number of valid data (since IPO) stock_ratio = ignoreData(stock_ratio, ignore_number) # Compose data frame and drop NaN df = pd.DataFrame({ 'bench_ratio': bench_ratio, 'stock_ratio': stock_ratio }) df = df.dropna(axis=0, how='any') df = df.reset_index(drop=True) df_number = len(df) # Compute correlation with other Series, excluding missing values. if df_number >= min_period_number: # Has sufficient data, exclude those IPO recently. b_ratio = df['bench_ratio'] s_ratio = df['stock_ratio'] correlation = b_ratio.corr(s_ratio) coef.ix[i, 'correlation'] = correlation # Calculate completeness coef.ix[i, 'code'] = column.replace('close_', '') null_count = price[column].isnull().sum() coef.ix[i, 'completeness'] = 1.0 - float(null_count) / len(price[column]) # 2. Calculate Alpha and Beta - Need to interpolate price for stop trading benchmark = price[price.columns[1]] bench_ratio = dataToRatio(benchmark, ratio_method) for i in range(stocks_number): column = price.columns[i + 2] stock = price[column].copy() # Turn price to ratio stock_ratio = dataToRatio(stock, ratio_method) # Manually ignore a given number of valid data (since IPO) stock_ratio = ignoreData(stock_ratio, ignore_number) # Compose data frame and drop NaN df = pd.DataFrame({ 'bench_ratio': bench_ratio, 'stock_ratio': stock_ratio }) df = df.dropna(axis=0, how='any') df = df.reset_index(drop=True) df_number = len(df) if df_number >= min_period_number: # Has sufficient data # Compute Beta w.r.t. Benchmark b_ratio = df['bench_ratio'] s_ratio = df['stock_ratio'] b_mean = b_ratio.mean() s_mean = s_ratio.mean() a = 0.0 b = 0.0 for j in range(df_number): a += (b_ratio[j] - b_mean) * (s_ratio[j] - s_mean) b += (b_ratio[j] - b_mean) * (b_ratio[j] - b_mean) beta = a / b # Same as below method # beta = b_ratio.cov(s_ratio) / b_ratio.var() coef.ix[i, 'beta'] = beta # Calculate Alpha alpha = s_mean - beta * b_mean coef.ix[i, 'alpha'] = alpha # Format Columns coef.set_index('code', inplace=True) for column in ['alpha', 'beta', 'correlation']: coef[column] = coef[column].map(lambda x: '%.3f' % x) coef[column] = coef[column].astype(float) coef['completeness'] = coef['completeness'].map(lambda x: ('%.2f' % (x * 100)) + '%') return coef
def calculateCoefficientRolling(price, rolling_number, min_period_number, ratio_method): # Create Coefficient Data Frame stocks_number = len(price.columns) - 2 # Remove 'date', 'close_benchmark' if stocks_number <= 0: print('No Stock Data to Calculate Coefficient!') return None date_number = len(price) coef_columns = ['date', 'close'] for i in range(stocks_number): column = price.columns[i + 2] stock_id = column.replace('close_', '') for item in ['close', 'completeness', 'alpha', 'beta', 'correlation']: coef_columns.append('_'.join([item, stock_id])) coef = u.createDataFrame(date_number, columns=coef_columns) coef['date'] = price['date'] coef['close'] = price['close'] for i in range(stocks_number): column = price.columns[i + 2] coef[column] = price[column] # Calculate Coefficients # 1. Calculate Correlation - No need to interpolate price_rolling for stop trading for i in range(date_number): if i + 1 < rolling_number: continue price_rolling = price.iloc[i + 1 - rolling_number:i + 1, :] price_rolling = price_rolling.reset_index(drop=True) benchmark = price_rolling[price_rolling.columns[1]] bench_ratio = dataToRatio(benchmark, ratio_method) for j in range(stocks_number): column = price_rolling.columns[j + 2] stock_id = column.replace('close_', '') stock = price_rolling[column].copy() # Turn price to ratio stock_ratio = dataToRatio(stock, ratio_method) # Compose data frame and drop NaN df = pd.DataFrame({ 'bench_ratio': bench_ratio, 'stock_ratio': stock_ratio }) df = df.dropna(axis=0, how='any') df = df.reset_index(drop=True) df_number = len(df) if df_number >= min_period_number: # Has sufficient data b_ratio = df['bench_ratio'] s_ratio = df['stock_ratio'] correlation = b_ratio.corr(s_ratio) coef.ix[i, '_'.join(['correlation', stock_id])] = correlation # Calculate completeness null_count = price_rolling[column].isnull().sum() coef.ix[i, '_'.join(['completeness', stock_id] )] = 1.0 - float(null_count) / len(price[column]) # 2. Calculate Alpha and Beta - No need to interpolate price_rolling for stop trading for i in range(date_number): if i + 1 < rolling_number: continue price_rolling = price.iloc[i + 1 - rolling_number:i + 1, :] price_rolling = price_rolling.reset_index(drop=True) benchmark = price_rolling[price_rolling.columns[1]] bench_ratio = dataToRatio(benchmark, ratio_method) for j in range(stocks_number): column = price_rolling.columns[j + 2] stock_id = column.replace('close_', '') stock = price_rolling[column].copy() # Turn price to ratio stock_ratio = dataToRatio(stock, ratio_method) # Compose data frame and drop NaN df = pd.DataFrame({ 'bench_ratio': bench_ratio, 'stock_ratio': stock_ratio }) df = df.dropna(axis=0, how='any') df = df.reset_index(drop=True) df_number = len(df) if df_number >= min_period_number: # Has sufficient data # Compute Beta w.r.t. Benchmark b_ratio = df['bench_ratio'] s_ratio = df['stock_ratio'] b_mean = b_ratio.mean() s_mean = s_ratio.mean() a = 0.0 b = 0.0 for k in range(df_number): a += (b_ratio[k] - b_mean) * (s_ratio[k] - s_mean) b += (b_ratio[k] - b_mean) * (b_ratio[k] - b_mean) beta = a / b # Same as below method # beta = b_ratio.cov(s_ratio) / b_ratio.var() coef.ix[i, '_'.join(['beta', stock_id])] = beta # Calculate Alpha alpha = s_mean - beta * b_mean coef.ix[i, '_'.join(['alpha', stock_id])] = alpha # Format Columns coef.set_index('date', inplace=True) for i in range(stocks_number): stock_id = price.columns[i + 2].replace('close_', '') for item in ['alpha', 'beta', 'correlation']: column = '_'.join([item, stock_id]) coef[column] = coef[column].map(lambda x: '%.3f' % x) coef[column] = coef[column].astype(float) for item in ['completeness']: column = '_'.join([item, stock_id]) coef[column] = coef[column].map(lambda x: ('%.2f' % (x * 100)) + '%' if not np.isnan(x) else np.nan) return coef
def plot_coefficient_price(stock_ids, allprice, postfix, series_name, benchmark_name): # If want to debug benchmark only (without stocks), set below flag to True. debug_benchmark_only = False # Extract Stock Prices and Normalize Them row_number = len(allprice) stock_number = len(stock_ids) columns = ['date', benchmark_name] if not debug_benchmark_only: for i in range(stock_number): stock_id = u.stockID(stock_ids[i]) columns.append(stock_id) prices = u.createDataFrame(row_number, columns) prices['date'] = allprice['date'] prices[benchmark_name] = allprice['close'] if not debug_benchmark_only: for i in range(stock_number): stock_id = u.stockID(stock_ids[i]) prices[stock_id] = allprice['close_' + stock_id] if debug_benchmark_only: print('Original Price') print(prices) # Normalize Price for i in range(1, len(columns)): column = columns[i] prices[column] = normalize_price(prices[column]) if debug_benchmark_only: print('Normalized Price') print(prices) # Calculate Relative Price w.r.t. First Valid Price for i in range(1, len(columns)): column = columns[i] row = -1 for j in range(row_number): if not np.isnan(prices.ix[j, column]): # Find first valid price row = j break if row != -1: if debug_benchmark_only: print('Row =', row) ref_price = prices.ix[ row, column] # Need to be cached in the first place as it will be normalized to one later. for j in range(row, row_number): cur_price = prices.ix[j, column] if not np.isnan(cur_price): prices.ix[ j, column] = 1.0 + (cur_price - ref_price) / ref_price if debug_benchmark_only: print('Relative Price') print(prices) # Plot Figure fig = plt.figure(figsize=(32, 18), dpi=72, facecolor="white") axes = plt.subplot(111) axes.cla() # Clear Axes # Define Font font = { 'family': 'serif', 'color': 'black', 'weight': 'normal', 'size': 18, } # Plot Sub-figure 1 title = '%s vs. %s' % (series_name, benchmark_name) plt.title(title, fontdict=font) axes.set_xlabel('', fontdict=font) axes.set_ylabel('Ratio', fontdict=font) prices.plot(x='date', y=benchmark_name, ax=axes, color='grey', lw=2.0, ls='--') if not debug_benchmark_only: for i in range(stock_number): column = u.stockID(stock_ids[i]) prices.plot(x='date', y=column, ax=axes) # Common Format for Both Sub-figures axes.grid(True) fig.autofmt_xdate() fig.tight_layout() plt.setp(plt.gca().get_xticklabels(), rotation=30) plt.show() # Save Figure fig_key = 'fig_coef' fig_path = c.path_dict[fig_key] fig_name = '_'.join( [postfix, series_name, 'vs', benchmark_name, u.dateToStr(u.today())]) fig_file = c.file_dict[fig_key] % fig_name u.saveFigure(fig, fig_path, fig_file)