def makeStkPrice(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天的OHLC、交易量、交易金额、复权因子和复权OHLC。 对于停牌股票和日期,这些列取空值。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [tradeday - str, ticker - str, open - numpy.float64, high - numpy.float64, low - numpy.float64, close - numpy.float64, volume - numpy.float64, amount - numpy.float64, adjust_factor - numpy.float64, adjust_open - numpy.float64, adjust_high - numpy.float64, adjust_low - numpy.float64, adjust_close - numpy.float64] ''' sql = \ ''' SELECT TRADE_DT, S_INFO_WINDCODE, S_DQ_OPEN, S_DQ_HIGH, S_DQ_LOW, S_DQ_CLOSE, S_DQ_VOLUME, S_DQ_AMOUNT, S_DQ_ADJFACTOR, S_DQ_ADJOPEN, S_DQ_ADJHIGH, S_DQ_ADJLOW, S_DQ_ADJCLOSE FROM ASHAREEODPRICES WHERE TRADE_DT >= {0} AND TRADE_DT <= {1} '''.format(start_date, end_date) with OracleSql() as oracle: price = oracle.query(sql) price.columns = [ "tradeday", "ticker", "open", "high", "low", "close", "volume", "amount", "adjust_factor", "adjust_open", "adjust_high", "adjust_low", "adjust_close" ] if daily_universe is None: tradedays = getTradeCalendar(start_date, end_date) daily_universe = pd.DataFrame([]) for tradeday in tradedays: daily_universe = daily_universe.append(makeDailyUniverse(tradeday)) daily_universe = pd.merge(daily_universe, price, on=["tradeday", "ticker"], how="left") daily_universe.index = list(range(daily_universe.shape[0])) return daily_universe
def makeStkName(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天的股票名称。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str] ''' sql = \ ''' SELECT S_INFO_WINDCODE, S_INFO_NAME, BEGINDATE, ENDDATE FROM ASHAREPREVIOUSNAME WHERE BEGINDATE <= {1} AND ( ENDDATE >= {0} OR ENDDATE IS NULL) '''.format(start_date, end_date) with OracleSql() as oracle: name_range = oracle.query(sql) if daily_universe is None: tradedays = getTradeCalendar(start_date, end_date) daily_universe = pd.DataFrame([]) for tradeday in tradedays: daily_universe = daily_universe.append(makeDailyUniverse(tradeday)) name_range['ENDDATE'] = np.where(name_range['ENDDATE'].isna(), end_date, name_range['ENDDATE']) daily_universe['name'] = None for idx, record in name_range.iterrows(): ticker = record['S_INFO_WINDCODE'] start_dt = record['BEGINDATE'] end_dt = record['ENDDATE'] name = record['S_INFO_NAME'] daily_universe['name'] = np.where((daily_universe['ticker'] == ticker) \ & (daily_universe['tradeday'] >= start_dt) \ & (daily_universe['tradeday'] <= end_dt), name, daily_universe['name']) daily_universe.index = list(range(daily_universe.shape[0])) return daily_universe
def makeStkST(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天的股票ST状态和类别。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [tradeday - str,ticker - str,ST - bool, ST_type - str] ''' sql = \ ''' SELECT S_INFO_WINDCODE, S_TYPE_ST, ENTRY_DT, REMOVE_DT FROM ASHAREST WHERE ENTRY_DT <= {1} AND ( REMOVE_DT > {0} --由于[ENTRY_DT, REMOVE_DT)左闭右开, 此处不能取等号 OR REMOVE_DT IS NULL) '''.format(start_date, end_date) with OracleSql() as oracle: st = oracle.query(sql) st["ST_type"] = st["S_TYPE_ST"].apply(parse_ST_type) if daily_universe is None: tradedays = getTradeCalendar(start_date, end_date) daily_universe = pd.DataFrame([]) for tradeday in tradedays: daily_universe = daily_universe.append(makeDailyUniverse(tradeday)) # 由于[ENTRY_DT, REMOVE_DT)左闭右开,不能用end_date来代替Null的REMOVE_DT, 用2099年12月31日代替右端点 st['REMOVE_DT'] = np.where(st['REMOVE_DT'].isna(), "20991231", st['REMOVE_DT']) daily_universe['ST'] = False daily_universe['ST_type'] = None for idx, record in st.iterrows(): ticker = record['S_INFO_WINDCODE'] start_dt = record['ENTRY_DT'] end_dt = record['REMOVE_DT'] ST_type = record['ST_type'] logic = (daily_universe['ticker'] == ticker) & (daily_universe['tradeday'] >= start_dt) \ & (daily_universe['tradeday'] < end_dt) #第三个条件没有等号 daily_universe['ST'] = np.where(logic, True, daily_universe['ST']) daily_universe['ST_type'] = np.where(logic, ST_type, daily_universe['ST_type']) daily_universe.index = list(range(daily_universe.shape[0])) return daily_universe
def makeStkEX(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天的股票的除息除权情况。 除息除权类别有6种:分红、股改、增发、配股、缩股、Null。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [tradeday - str, ticker - str, ex_right_dividend - bool, ex_type - str] ''' sql = \ ''' SELECT EX_DATE, S_INFO_WINDCODE, EX_TYPE FROM AShareEXRightDividendRecord WHERE EX_DATE >= {0} AND EX_DATE <= {1} '''.format(start_date, end_date) with OracleSql() as oracle: EX = oracle.query(sql) EX.columns = ["tradeday", "ticker", "ex_type"] EX["ex_right_dividend"] = True EX = EX[["tradeday", "ticker", "ex_right_dividend", "ex_type"]] if daily_universe is None: tradedays = getTradeCalendar(start_date, end_date) daily_universe = pd.DataFrame([]) for tradeday in tradedays: daily_universe = daily_universe.append(makeDailyUniverse(tradeday)) daily_universe = pd.merge(daily_universe, EX, on=["tradeday", "ticker"], how="left") daily_universe["ex_right_dividend"].fillna(False, inplace=True) daily_universe.sort_values(by=["tradeday", "ticker"], inplace=True) daily_universe.index = list(range(daily_universe.shape[0])) return daily_universe
def makeStkName(start_date: str, end_date: str) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天的股票名称。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [日期,股票代码,股票名称] ''' sql = \ ''' SELECT S_INFO_WINDCODE, S_INFO_NAME, BEGINDATE, ENDDATE FROM ASHAREPREVIOUSNAME WHERE BEGINDATE <= {1} AND ( ENDDATE >= {0} OR ENDDATE IS NULL) '''.format(start_date, end_date) with OracleSql() as oracle: name_range = oracle.query(sql) name_range.ENDDATE = name_range.ENDDATE.apply(lambda s: end_date if s is None else s) ticker_list = name_range.groupby("S_INFO_WINDCODE").S_INFO_WINDCODE.count().index.tolist() tradedate_list = getTradeCalendar(start_date, end_date) stock_name_df = pd.DataFrame(columns=["tradeday", "ticker", "name"]) i = 0 N = len(tradedate_list) * len(ticker_list) for date in tradedate_list: for ticker in ticker_list: name = name_range[(name_range.S_INFO_WINDCODE == ticker) & (name_range.BEGINDATE <= date) & (name_range.ENDDATE >= date)].squeeze().S_INFO_NAME stock_name_df.loc[i] = [date, ticker, name] i += 1 print("\r完成" + str(round(100 * i / N, 1)) + '%', end='') return stock_name_df
def makeStkConstituent(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天股票的成分股信息。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str] ''' sql1 = \ ''' SELECT TRADE_DT, S_CON_WINDCODE FROM AINDEXCSI500WEIGHT WHERE TRADE_DT >= {0} AND TRADE_DT <= {1} '''.format(start_date, end_date) with OracleSql() as oracle: CSI500 = oracle.query(sql1) sql2 = \ ''' SELECT TRADE_DT, S_CON_WINDCODE FROM AIndexHS300Weight WHERE TRADE_DT >= {0} AND TRADE_DT <= {1} '''.format(start_date, end_date) with OracleSql() as oracle: HS300 = oracle.query(sql2) CSI500.columns = ["tradeday", "ticker"] CSI500["CSI500"] = True HS300.columns = ["tradeday", "ticker"] HS300["HS300"] = True if daily_universe is None: tradedays = getTradeCalendar(start_date, end_date) daily_universe = pd.DataFrame([]) for tradeday in tradedays: daily_universe = daily_universe.append(makeDailyUniverse(tradeday)) daily_universe = pd.merge(daily_universe, CSI500, on=["tradeday", "ticker"], how="left") daily_universe = pd.merge(daily_universe, HS300, on=["tradeday", "ticker"], how="left") daily_universe["CSI500"].fillna(False, inplace=True) daily_universe["HS300"].fillna(False, inplace=True) daily_universe.sort_values(by=["tradeday", "ticker"], inplace=True) daily_universe.index = list(range(daily_universe.shape[0])) daily_universe.to_csv("debug.csv", encoding="gbk") return daily_universe
def makeStkSWIndustry(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天股票的申万行业一级分类和二级分类。 AShareSWIndustriesClass表中[ENTRY_DT, REMOVE_DT]是闭区间,与AShareST中的左闭右开区间不同,因此使用makeStkName逻辑。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str, sw_industry_L1 - str, sw_industry_L2 - str] ''' sql1 = \ ''' SELECT a.S_INFO_WINDCODE, b.INDUSTRIESNAME, a.ENTRY_DT, a.REMOVE_DT FROM ASHARESWINDUSTRIESCLASS a, ASHAREINDUSTRIESCODE b WHERE substr( a.SW_IND_CODE, 1, 4 ) = substr( b.INDUSTRIESCODE, 1, 4 ) AND b.LEVELNUM = '2' AND ENTRY_DT <= {1} AND (REMOVE_DT >= {0} OR REMOVE_DT IS NULL) '''.format(start_date, end_date) with OracleSql() as oracle: industry_L1 = oracle.query(sql1) sql2 = \ ''' SELECT a.S_INFO_WINDCODE, b.INDUSTRIESNAME, a.ENTRY_DT, a.REMOVE_DT FROM ASHARESWINDUSTRIESCLASS a, ASHAREINDUSTRIESCODE b WHERE substr( a.SW_IND_CODE, 1, 6 ) = substr( b.INDUSTRIESCODE, 1, 6 ) AND b.LEVELNUM = '3' AND ENTRY_DT <= {1} AND (REMOVE_DT >= {0} OR REMOVE_DT IS NULL) '''.format(start_date, end_date) with OracleSql() as oracle: industry_L2 = oracle.query(sql2) industry_L1['REMOVE_DT'] = np.where(industry_L1['REMOVE_DT'].isna(), end_date, industry_L1['REMOVE_DT']) industry_L2['REMOVE_DT'] = np.where(industry_L2['REMOVE_DT'].isna(), end_date, industry_L2['REMOVE_DT']) if daily_universe is None: tradedays = getTradeCalendar(start_date, end_date) daily_universe = pd.DataFrame([]) for tradeday in tradedays: daily_universe = daily_universe.append(makeDailyUniverse(tradeday)) daily_universe['sw_industry_L1'] = None daily_universe['sw_industry_L2'] = None for idx, record in industry_L1.iterrows(): ticker = record['S_INFO_WINDCODE'] start_dt = record['ENTRY_DT'] end_dt = record['REMOVE_DT'] industry_name = record['INDUSTRIESNAME'] daily_universe['sw_industry_L1'] = np.where((daily_universe['ticker'] == ticker) \ & (daily_universe['tradeday'] >= start_dt) \ & (daily_universe['tradeday'] <= end_dt), industry_name, daily_universe['sw_industry_L1']) for idx, record in industry_L2.iterrows(): ticker = record['S_INFO_WINDCODE'] start_dt = record['ENTRY_DT'] end_dt = record['REMOVE_DT'] industry_name = record['INDUSTRIESNAME'] daily_universe['sw_industry_L2'] = np.where((daily_universe['ticker'] == ticker) \ & (daily_universe['tradeday'] >= start_dt) \ & (daily_universe['tradeday'] <= end_dt), industry_name, daily_universe['sw_industry_L2']) daily_universe.index = list(range(daily_universe.shape[0])) return daily_universe
def makeStkSuspend(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame: ''' 查询某一日期区间内(包括查询初始日和结束日)每天的股票的停牌状态。是否停牌用布尔值表示(停牌是True), 停牌类型、停牌原因、停牌原因代码缺省值是None。 :param start_date: str, 初始日期,"YYYYMMDD" :param end_date: str, 结束日期,"YYYYMMDD" :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str, suspend - bool, suspend_type - str, suspend_reason_code - object, suspend_reason - str] ''' sql = \ ''' SELECT S_INFO_WINDCODE, S_DQ_SUSPENDDATE, S_DQ_RESUMPDATE, S_DQ_SUSPENDTYPE, S_DQ_CHANGEREASONTYPE, S_DQ_CHANGEREASON FROM ASHARETRADINGSUSPENSION WHERE ( S_DQ_RESUMPDATE IS NULL AND S_DQ_SUSPENDDATE >= {0} AND S_DQ_SUSPENDDATE <= {1} ) OR (S_DQ_RESUMPDATE IS NOT NULL AND S_DQ_SUSPENDDATE <= {1} AND S_DQ_RESUMPDATE >= {0} ) '''.format(start_date, end_date) with OracleSql() as oracle: name_range = oracle.query(sql) name_range['last_suspend_date'] = np.where( name_range['S_DQ_RESUMPDATE'].isna(), name_range["S_DQ_SUSPENDDATE"], get_last_suspend_list(name_range['S_DQ_RESUMPDATE'])) name_range["S_DQ_SUSPENDTYPE"] = name_range["S_DQ_SUSPENDTYPE"].apply( parse_suspend_type) if daily_universe is None: tradedays = getTradeCalendar(start_date, end_date) daily_universe = pd.DataFrame([]) for tradeday in tradedays: daily_universe = daily_universe.append(makeDailyUniverse(tradeday)) daily_universe['suspend'] = False daily_universe['suspend_type'] = None daily_universe['suspend_reason_code'] = None daily_universe['suspend_reason'] = None for idx, record in name_range.iterrows(): ticker = record['S_INFO_WINDCODE'] start_dt = record['S_DQ_SUSPENDDATE'] end_dt = record['last_suspend_date'] suspend_type = record['S_DQ_SUSPENDTYPE'] suspend_reason = record["S_DQ_CHANGEREASON"] suspend_reason_code = record["S_DQ_CHANGEREASONTYPE"] logic = (daily_universe['ticker'] == ticker) & (daily_universe['tradeday'] >= start_dt) & \ (daily_universe['tradeday'] <= end_dt) daily_universe['suspend'] = np.where(logic, True, daily_universe['suspend']) daily_universe['suspend_type'] = np.where( logic, suspend_type, daily_universe['suspend_type']) daily_universe['suspend_reason'] = np.where( logic, suspend_reason, daily_universe['suspend_reason']) daily_universe['suspend_reason_code'] = np.where( logic, suspend_reason_code, daily_universe['suspend_reason_code']) daily_universe.index = list(range(daily_universe.shape[0])) return daily_universe