def fileCompare(src_fullpath, tar_fullpath): # Check File Existence if not u.hasFile(src_fullpath): print('Source File %s Does Not Exist' % src_fullpath) raise SystemExit if not u.hasFile(tar_fullpath): print('Target File %s Does Not Exist' % tar_fullpath) raise SystemExit # Load Data Files src = u.read_csv(src_fullpath) tar = u.read_csv(tar_fullpath) src_row_number = len(src) tar_row_number = len(tar) src_col_number = len(src.columns) tar_col_number = len(tar.columns) matched = True print('File Compare Start: %s vs %s' % (src_fullpath, tar_fullpath)) if src_row_number != tar_row_number: matched = False print('Row Number Un-matched') elif src_col_number != tar_col_number: matched = False print('Col Number Un-matched') else: for i in range(src_row_number): for j in range(src_col_number): if src.iloc[i,j] != tar.iloc[i,j]: matched = False print('Element(%s,%s) Un-matched' % (i,j)) print('File Compare End: %s' % ('Matched' if matched else 'Un-Matched')) # Return Result return matched
def extractCommodityPrice(code, column): # Check Pre-requisite fullpath = c.fullpath_dict['commodity'] % code if not u.hasFile(fullpath): print('Require File Exists:', fullpath) return # Load Commodity Data data = loadCommodityPrice(code) data.set_index(u'发布时间', inplace=True) print(data.head(10)) # Extract Price Data based on Given Column market = data[column].drop_duplicates() market_number = len(market) print('Market Number:', market_number) print('Markets:', market) i = 0 for m in market: print('Market %s: %s' % (i + 1, m)) m_name = 'Market_%s' % (i + 1) m_data = data[data[column].isin([m])] if not u.isNoneOrEmpty(m_data): u.to_csv(m_data, c.path_dict['commodity'], c.file_dict['commodity_m'] % (code, m_name)) i = i + 1 '''
def loadFinanceSummary(stock_id): # Ensure data file is available fullpath = c.fullpath_dict['finsum'] % stock_id if not u.hasFile(fullpath): print('Require Finance Summary of %s!' % fullpath) return None fs = u.read_csv(fullpath) return fs
def loadStockBasics(): # Ensure data file is available fullpath = c.fullpath_dict['basics'] if not u.hasFile(fullpath): print('Require Stock Basics: %s!' % fullpath) return None basics = u.read_csv(fullpath) return basics
def loadDailyHFQ(stock_id, is_index): fullpath = c.fullpath_dict['lshq'] % u.stockFileName(stock_id, is_index) # Ensure data file is available if not u.hasFile(fullpath): print('Require LSHQ of %s!' % u.stockFileName(stock_id, is_index)) return None # Read data file df = u.read_csv(fullpath) return df
def load_component(index_name): fullpath = c.path_dict['index'] + c.file_dict['index_c'] % index_name # Ensure data file is available if not u.hasFile(fullpath): print('Require Index Component of %s!' % index_name) return None # Read data file df = u.read_csv(fullpath) return df
def validDailyHFQ(stock_id, is_index, force_update): if force_update == True: return False else: return u.hasFile(c.fullpath_dict['lshq'] % u.stockFileName(stock_id, is_index))
def calc_hpe(stock_id, period, ratio): ''' 函数功能: -------- 逐周期计算历史市盈率。 假定:逐周期前复权数据,Finance Summary数据已经下载或计算完成,并存储成为CSV文件。 输入参数: -------- stock_id : string, 股票代码 e.g. 600036 period : string, 采样周期 e.g. 'W', 'M', 'Q' 输出参数: -------- DataFrame date 周期截止日期(为周期最后一天) e.g. 2005-03-31 high 周期最高价 close 周期收盘价 low 周期最低价 eps 周期末每股收益(可能有数据缺失) eps_filled 根据邻近周期推算出的周期末每股收益 eps_rolling 根据周期末每股收益(含推算),折算的年度预期每股收益 pe_high 根据周期最高价,计算出的市盈率 pe_close 根据周期收盘价,计算出的市盈率 pe_low 根据周期最低价,计算出的市盈率 ''' # Check Input Parameters if not isinstance(stock_id, str) or not isinstance(period, str): print('Incorrect type of one or more input parameters!') raise SystemExit # Check Period period_types = ['W', 'M', 'Q'] if not period in period_types: print('Un-supported period type - should be one of:', period_types) raise SystemExit # Check Ratio ratio_types = ['PE', 'EP'] if not ratio in ratio_types: print('Un-supported ratio type - should be one of:', ratio_types) raise SystemExit # Ensure Stock QFQ Data File is Available qfq_path = c.path_dict['qfq'] % period qfq_file = c.file_dict['qfq'] % (period, stock_id) qfq_fullpath = qfq_path + qfq_file if not u.hasFile(qfq_fullpath): print('Require stock QFQ file:', (qfq_fullpath)) raise SystemExit # Ensure Stock Finance Summary Data File is Available fs_fullpath = c.fullpath_dict['finsum'] % stock_id if not u.hasFile(fs_fullpath): print('Require stock finance summary file:', (fs_fullpath)) raise SystemExit # # Load QFQ Data # qfq = u.read_csv(qfq_fullpath) qfq.set_index('date', inplace=True) qfq.sort_index(ascending=True, inplace=True) if gs.is_debug: print(qfq.head(10)) # Check empty QFQ data qfq_number = len(qfq) if qfq_number == 0: print('Stock QFQ data length is 0!') raise SystemExit # Handle stop-trading period (by filling with previous period data) # Assume: qfq data has been sorted ascendingly by date. for i in range(qfq_number): if i > 0 and np.isnan(qfq.iloc[i]['close']): if gs.is_debug: print('close = ', qfq.iloc[i]['close']) if np.isnan(qfq.iloc[i - 1] ['close']): # Ignore leading stop-trading periods continue else: # Regular internal stop-trading periods for column in qfq.columns: qfq.iloc[i][column] = qfq.iloc[i - 1][column] # # Load Finance Summary Data # fs = u.read_csv(fs_fullpath) fs.set_index('date', inplace=True) fs.sort_index(ascending=True, inplace=True) if gs.is_debug: print(fs.head(10)) # Check empty Finance Summary data fs_number = len(fs) if fs_number == 0: print('Stock finance summary data length is 0!') raise SystemExit # # Generate Rolling EPS for Each Quarter # eps_index = [] date_start = u.dateFromStr(qfq.index[0]) # First element date_end = u.dateFromStr(qfq.index[-1]) # Last element year_start = date_start.year year_end = date_end.year for year in range(year_start, year_end + 1): for quarter in range(1, 5): date = u.quarterDateStr(year, quarter) eps_index.append(date) if gs.is_debug: print(eps_index) eps_columns = ['eps', 'eps_filled', 'eps_rolling'] eps_columns_number = len(eps_columns) eps_index_number = len(eps_index) # Init all elements to NaN data_init = np.random.randn(eps_index_number * eps_columns_number) for i in range(eps_index_number * eps_columns_number): data_init[i] = np.nan eps = pd.DataFrame(data_init.reshape(eps_index_number, eps_columns_number), index=eps_index, columns=eps_columns) # Inherite EPS from finance summary for i in range(eps_index_number): index = eps.index[i] if index in fs.index: # Has EPS data eps.iloc[i]['eps'] = fs.loc[index, 'eps'] else: # Missing EPS data eps.iloc[i]['eps'] = np.nan # Fill the Missing EPS Data for year in range(year_start, year_end + 1): index_q1 = u.quarterDateStr(year, 1) index_q2 = u.quarterDateStr(year, 2) index_q3 = u.quarterDateStr(year, 3) index_q4 = u.quarterDateStr(year, 4) eps_q1 = eps.loc[index_q1, 'eps'] eps_q2 = eps.loc[index_q2, 'eps'] eps_q3 = eps.loc[index_q3, 'eps'] eps_q4 = eps.loc[index_q4, 'eps'] if gs.is_debug: print('eps_q1 =', eps_q1, 'eps_q2 =', eps_q2, 'eps_q3 =', eps_q3, 'eps_q4 =', eps_q4) eps_q1_filled = eps_q1 eps_q2_filled = eps_q2 eps_q3_filled = eps_q3 eps_q4_filled = eps_q4 if (np.isnan(eps_q1)): if (not np.isnan(eps_q2)): eps_q1_filled = eps_q2 * 0.5 elif (not np.isnan(eps_q3)): eps_q1_filled = eps_q3 * 0.3333333333333333 elif (not np.isnan(eps_q4)): eps_q1_filled = eps_q4 * 0.25 if (np.isnan(eps_q2)): if (not np.isnan(eps_q1)): eps_q2_filled = eps_q1 * 2.0 elif (not np.isnan(eps_q3)): eps_q2_filled = eps_q3 * 0.6666666666666667 elif (not np.isnan(eps_q4)): eps_q2_filled = eps_q4 * 0.5 if (np.isnan(eps_q3)): if (not np.isnan(eps_q2)): eps_q3_filled = eps_q2 * 1.5 elif (not np.isnan(eps_q1)): eps_q3_filled = eps_q1 * 3.0 elif (not np.isnan(eps_q4)): eps_q3_filled = eps_q4 * 0.75 if (np.isnan(eps_q4)): if (not np.isnan(eps_q3)): eps_q4_filled = eps_q3 * 1.333333333333333 elif (not np.isnan(eps_q2)): eps_q4_filled = eps_q2 * 2.0 elif (not np.isnan(eps_q1)): eps_q4_filled = eps_q1 * 4.0 if gs.is_debug: print('eps_q1_filled =', eps_q1_filled, 'eps_q2_filled =', eps_q2_filled, 'eps_q3_filled =', eps_q3_filled, 'eps_q4_filled =', eps_q4_filled) eps.loc[index_q1, 'eps_filled'] = eps_q1_filled eps.loc[index_q2, 'eps_filled'] = eps_q2_filled eps.loc[index_q3, 'eps_filled'] = eps_q3_filled eps.loc[index_q4, 'eps_filled'] = eps_q4_filled # Calculate Rolling EPS rolling_ratio = [4.0, 2.0, 1.333333333333333, 1.0] for year in range(year_start, year_end + 1): for quarter in range(1, 5): index = u.quarterDateStr(year, quarter) eps_filled = eps.loc[index, 'eps_filled'] eps.loc[index, 'eps_rolling'] = eps_filled * rolling_ratio[quarter - 1] if gs.is_debug: print(eps.head(10)) # # Calculate HPE based on given period # # Drop un-used columns hpe = qfq.drop(['open', 'volume', 'amount'], axis=1) # Add columns to hpe if ratio == 'PE': for column in [ 'eps', 'eps_filled', 'eps_rolling', 'pe_high', 'pe_close', 'pe_low' ]: hpe[column] = np.nan else: for column in [ 'eps', 'eps_filled', 'eps_rolling', 'ep_high', 'ep_close', 'ep_low' ]: hpe[column] = np.nan # Calculate Historical P/E or E/P Ratio hpe_number = len(hpe) for i in range(hpe_number): index = hpe.index[i] # 'YYYY-mm-dd' index_date = u.dateFromStr(index) # datetime.date(YYYY-mm-dd) index_quarter = u.quarterDateStr( index_date.year, u.quarterOfDate(index_date)) # 'YYYY-mm-dd' for column in ['eps', 'eps_filled', 'eps_rolling']: hpe.loc[index, column] = eps.loc[index_quarter, column] if ratio == 'PE': # Calculate Historical P/E Ratio price = {'pe_close': 'close', 'pe_high': 'high', 'pe_low': 'low'} for i in range(hpe_number): index = hpe.index[i] # 'YYYY-mm-dd' eps_rolling = hpe.iloc[i]['eps_rolling'] for column in ['pe_close', 'pe_high', 'pe_low']: hpe.loc[index, column] = hpe.loc[index, price[column]] / eps_rolling else: # Calculate Historical E/P Ratio price = {'ep_close': 'close', 'ep_high': 'high', 'ep_low': 'low'} for i in range(hpe_number): index = hpe.index[i] # 'YYYY-mm-dd' eps_rolling = hpe.iloc[i]['eps_rolling'] for column in ['ep_close', 'ep_high', 'ep_low']: hpe.loc[index, column] = eps_rolling / hpe.loc[index, price[column]] # Format columns for column in hpe.columns: hpe[column] = hpe[column].map(lambda x: '%.2f' % x) hpe[column] = hpe[column].astype(float) return hpe
def plot_HPE(stock_id, period, ratio): # Check Input Parameters if not isinstance(stock_id, str) or not isinstance(period, str): print('Incorrect type of one or more input parameters!') raise SystemExit # Check Period period_types = ['W', 'M', 'Q'] if not period in period_types: print('Un-supported period type - should be one of:', period_types) raise SystemExit # Check Ratio ratio_types = ['PE', 'EP'] if not ratio in ratio_types: print('Un-supported ratio type - should be one of:', ratio_types) raise SystemExit # Check Pre-requisite: HPE File key = 'hpe' if ratio == 'PE' else 'hep' path = c.path_dict[key] % period file = c.file_dict[key] % (period, stock_id) hpe_fullpath = path + file if not u.hasFile(hpe_fullpath): print('Require File Exists:', hpe_fullpath) raise SystemExit # Load Data File hpe = loadHPE(stock_id=stock_id, period=period, ratio=ratio) # Plot Figure fig, (ax1, ax2) = plt.subplots(2, sharex=True) # fig = plt.figure() fig.set_size_inches(32, 18) # Plot Sub-figure 1 # ax1 = plt.subplot(211) ratio_name = 'P/E Ratio' if ratio == 'PE' else 'E/P Ratio' title = ratio_name + ' Ratio of Stock %s' % stock_id ax1.set_title(title, fontsize=14) ax1.set_xlabel('') ax1.set_ylabel(ratio_name) if ratio == 'PE': # hpe.plot(x='date', y='pe_high', ax=ax1) hpe.plot(x='date', y='pe_close', ax=ax1) # hpe.plot(x='date', y='pe_low', ax=ax1) else: # hpe.plot(x='date', y='ep_high', ax=ax1) hpe.plot(x='date', y='ep_close', ax=ax1) # hpe.plot(x='date', y='ep_low', ax=ax1) # Plot Sub-figure 2 # ax2 = plt.subplot(212) ax2.set_title('Price of Stock %s' % stock_id, fontsize=14) ax2.set_xlabel('') ax2.set_ylabel('Price') # hpe.plot(x='date', y='high', ax=ax2) hpe.plot(x='date', y='close', ax=ax2) # hpe.plot(x='date', y='low', ax=ax2) # Common Format for Both Sub-figures for ax in [ax1, ax2]: ax.grid(True) fig.autofmt_xdate() fig.tight_layout() plt.setp(plt.gca().get_xticklabels(), rotation=30) plt.show() # Save Figure fig_key = 'fig_hpe' if ratio == 'PE' else 'fig_hep' fig_path = c.path_dict[fig_key] % period fig_file = c.file_dict[fig_key] % (period, stock_id) u.saveFigure(fig, fig_path, fig_file)
def validFinanceSummary(stock_id, force_update): if force_update == True: return False else: return u.hasFile(c.fullpath_dict['finsum'] % stock_id)
def validStockBasics(force_update): if force_update == True: return False else: return u.hasFile(c.fullpath_dict['basics'])
def calc_qfq(stock_id, period): ''' 函数功能: -------- 逐周期计算历史前复权数据,用于历史市盈率计算。 假定:历史行情已经下载并存储成为CSV文件。 输入参数: -------- stock_id : string, 股票代码 e.g. 600036 period : string, 重采样周期 e.g. 'W', 'M', 'Q' 输出参数: -------- DataFrame date 周期截止日期 e.g. 2005-03-31 open 周期开盘价 high 周期最高价 close 周期收盘价 low 周期最低价 volume 周期成交量 amount 周期成交额 ''' # Check Input Parameters if not isinstance(stock_id, str) or not isinstance(period, str): print('Incorrect type of one or more input parameters!') raise SystemExit # Check Period period_types = ['W', 'M', 'Q'] if not period in period_types: print('Un-supported period type - should be one of:', period_types) raise SystemExit # Ensure Stock LSHQ Data File is Available if not u.hasFile(c.fullpath_dict['lshq'] % stock_id): print('Require LSHQ of Stock %s!' % stock_id) raise SystemExit df = u.read_csv(c.fullpath_dict['lshq'] % stock_id) df['date'] = df['date'].astype(np.datetime64) df.set_index('date', inplace=True) df.sort_index(ascending=True, inplace=True) if gs.is_debug: print(df.head(10)) df_resample = df.resample(period).first() df_resample['open'] = df['open'].resample(period).first() df_resample['high'] = df['high'].resample(period).max() df_resample['close'] = df['close'].resample(period).last() df_resample['low'] = df['low'].resample(period).min() df_resample['volume'] = df['volume'].resample(period).sum() df_resample['amount'] = df['amount'].resample(period).sum() df_resample['factor'] = df['factor'].resample(period).last() df_resample_number = len(df_resample) for i in range(df_resample_number): fq_factor = df_resample.iloc[i]['factor'] for column in ['open', 'high', 'close', 'low']: df_resample.iloc[i][ column] = df_resample.iloc[i][column] / fq_factor df_resample.iloc[i]['volume'] = df_resample.iloc[i][ 'volume'] # Not touch it df_resample.iloc[i]['amount'] = df_resample.iloc[i]['amount'] df_resample.drop('factor', axis=1, inplace=True) # Return Resampled Dataframe return df_resample
def validCommodityPrice(code, force_update): if force_update == True: return False else: return u.hasFile(c.fullpath_dict['commodity'] % code)