Exemple #1
0
def makeStkPrice(start_date: str,
                 end_date: str,
                 daily_universe=None) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天的OHLC、交易量、交易金额、复权因子和复权OHLC。
    对于停牌股票和日期,这些列取空值。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [tradeday - str, ticker - str, open - numpy.float64,  high - numpy.float64,
            low - numpy.float64, close - numpy.float64, volume - numpy.float64, amount - numpy.float64,
            adjust_factor - numpy.float64, adjust_open - numpy.float64, adjust_high - numpy.float64,
            adjust_low - numpy.float64, adjust_close - numpy.float64]
    '''

    sql = \
        '''
        SELECT
            TRADE_DT,
            S_INFO_WINDCODE,
            S_DQ_OPEN,
            S_DQ_HIGH,
            S_DQ_LOW,
            S_DQ_CLOSE, 
            S_DQ_VOLUME,
            S_DQ_AMOUNT, 
            S_DQ_ADJFACTOR,
            S_DQ_ADJOPEN,
            S_DQ_ADJHIGH,
            S_DQ_ADJLOW, 
            S_DQ_ADJCLOSE
        FROM
            ASHAREEODPRICES 
        WHERE
            TRADE_DT >= {0}
            AND TRADE_DT <= {1}
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        price = oracle.query(sql)
    price.columns = [
        "tradeday", "ticker", "open", "high", "low", "close", "volume",
        "amount", "adjust_factor", "adjust_open", "adjust_high", "adjust_low",
        "adjust_close"
    ]

    if daily_universe is None:
        tradedays = getTradeCalendar(start_date, end_date)
        daily_universe = pd.DataFrame([])
        for tradeday in tradedays:
            daily_universe = daily_universe.append(makeDailyUniverse(tradeday))

    daily_universe = pd.merge(daily_universe,
                              price,
                              on=["tradeday", "ticker"],
                              how="left")

    daily_universe.index = list(range(daily_universe.shape[0]))
    return daily_universe
Exemple #2
0
def makeStkName(start_date: str,
                end_date: str,
                daily_universe=None) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天的股票名称。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str]
    '''

    sql = \
        '''
        SELECT
            S_INFO_WINDCODE,
            S_INFO_NAME,
            BEGINDATE,
            ENDDATE 
        FROM
            ASHAREPREVIOUSNAME 
        WHERE
            BEGINDATE <= {1}
            AND (
            ENDDATE >= {0} 
            OR ENDDATE IS NULL)
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        name_range = oracle.query(sql)

    if daily_universe is None:
        tradedays = getTradeCalendar(start_date, end_date)
        daily_universe = pd.DataFrame([])
        for tradeday in tradedays:
            daily_universe = daily_universe.append(makeDailyUniverse(tradeday))

    name_range['ENDDATE'] = np.where(name_range['ENDDATE'].isna(), end_date,
                                     name_range['ENDDATE'])

    daily_universe['name'] = None

    for idx, record in name_range.iterrows():
        ticker = record['S_INFO_WINDCODE']
        start_dt = record['BEGINDATE']
        end_dt = record['ENDDATE']
        name = record['S_INFO_NAME']
        daily_universe['name'] = np.where((daily_universe['ticker'] == ticker) \
                                          & (daily_universe['tradeday'] >= start_dt) \
                                          & (daily_universe['tradeday'] <= end_dt),
                                          name, daily_universe['name'])
    daily_universe.index = list(range(daily_universe.shape[0]))
    return daily_universe
Exemple #3
0
def makeStkST(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天的股票ST状态和类别。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [tradeday - str,ticker - str,ST - bool, ST_type - str]
    '''

    sql = \
        '''
        SELECT
            S_INFO_WINDCODE,
            S_TYPE_ST,
            ENTRY_DT,
            REMOVE_DT 
        FROM
            ASHAREST
        WHERE
            ENTRY_DT <= {1}
            AND (
            REMOVE_DT > {0}   --由于[ENTRY_DT, REMOVE_DT)左闭右开, 此处不能取等号
            OR REMOVE_DT IS NULL)
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        st = oracle.query(sql)
    st["ST_type"] = st["S_TYPE_ST"].apply(parse_ST_type)
    if daily_universe is None:
        tradedays = getTradeCalendar(start_date, end_date)
        daily_universe = pd.DataFrame([])
        for tradeday in tradedays:
            daily_universe = daily_universe.append(makeDailyUniverse(tradeday))

    # 由于[ENTRY_DT, REMOVE_DT)左闭右开,不能用end_date来代替Null的REMOVE_DT, 用2099年12月31日代替右端点
    st['REMOVE_DT'] = np.where(st['REMOVE_DT'].isna(), "20991231", st['REMOVE_DT'])

    daily_universe['ST'] = False
    daily_universe['ST_type'] = None

    for idx, record in st.iterrows():
        ticker = record['S_INFO_WINDCODE']
        start_dt = record['ENTRY_DT']
        end_dt = record['REMOVE_DT']
        ST_type = record['ST_type']
        logic = (daily_universe['ticker'] == ticker) & (daily_universe['tradeday'] >= start_dt) \
                & (daily_universe['tradeday'] < end_dt) #第三个条件没有等号
        daily_universe['ST'] = np.where(logic, True, daily_universe['ST'])
        daily_universe['ST_type'] = np.where(logic, ST_type, daily_universe['ST_type'])
    daily_universe.index = list(range(daily_universe.shape[0]))
    return daily_universe
Exemple #4
0
def makeStkEX(start_date: str,
              end_date: str,
              daily_universe=None) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天的股票的除息除权情况。
    除息除权类别有6种:分红、股改、增发、配股、缩股、Null。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [tradeday - str, ticker - str, ex_right_dividend - bool, ex_type - str]
    '''

    sql = \
        '''
        SELECT
            EX_DATE,
            S_INFO_WINDCODE,
            EX_TYPE 
        FROM
            AShareEXRightDividendRecord
        WHERE
            EX_DATE >= {0}
            AND EX_DATE <= {1}
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        EX = oracle.query(sql)
    EX.columns = ["tradeday", "ticker", "ex_type"]
    EX["ex_right_dividend"] = True
    EX = EX[["tradeday", "ticker", "ex_right_dividend", "ex_type"]]

    if daily_universe is None:
        tradedays = getTradeCalendar(start_date, end_date)
        daily_universe = pd.DataFrame([])
        for tradeday in tradedays:
            daily_universe = daily_universe.append(makeDailyUniverse(tradeday))

    daily_universe = pd.merge(daily_universe,
                              EX,
                              on=["tradeday", "ticker"],
                              how="left")
    daily_universe["ex_right_dividend"].fillna(False, inplace=True)
    daily_universe.sort_values(by=["tradeday", "ticker"], inplace=True)
    daily_universe.index = list(range(daily_universe.shape[0]))
    return daily_universe
Exemple #5
0
def makeStkName(start_date: str, end_date: str) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天的股票名称。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [日期,股票代码,股票名称]
    '''

    sql = \
        '''
        SELECT
            S_INFO_WINDCODE,
            S_INFO_NAME,
            BEGINDATE,
            ENDDATE 
        FROM
            ASHAREPREVIOUSNAME 
        WHERE
            BEGINDATE <= {1}
            AND (
            ENDDATE >= {0} 
            OR ENDDATE IS NULL)
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        name_range = oracle.query(sql)
    name_range.ENDDATE = name_range.ENDDATE.apply(lambda s: end_date if s is None else s)
    ticker_list = name_range.groupby("S_INFO_WINDCODE").S_INFO_WINDCODE.count().index.tolist()
    tradedate_list = getTradeCalendar(start_date, end_date)
    stock_name_df = pd.DataFrame(columns=["tradeday", "ticker", "name"])
    i = 0
    N = len(tradedate_list) * len(ticker_list)
    for date in tradedate_list:
        for ticker in ticker_list:
            name = name_range[(name_range.S_INFO_WINDCODE == ticker) & (name_range.BEGINDATE <= date)
                       & (name_range.ENDDATE >= date)].squeeze().S_INFO_NAME
            stock_name_df.loc[i] = [date, ticker, name]
            i += 1
        print("\r完成" + str(round(100 * i / N, 1)) + '%', end='')
    return stock_name_df
Exemple #6
0
def makeStkConstituent(start_date: str,
                       end_date: str,
                       daily_universe=None) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天股票的成分股信息。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str]
    '''

    sql1 = \
        '''
        SELECT
            TRADE_DT,
            S_CON_WINDCODE 
        FROM
            AINDEXCSI500WEIGHT
        WHERE
            TRADE_DT >= {0}
            AND TRADE_DT <= {1}
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        CSI500 = oracle.query(sql1)

    sql2 = \
        '''
        SELECT
            TRADE_DT,
            S_CON_WINDCODE 
        FROM
            AIndexHS300Weight
        WHERE
            TRADE_DT >= {0}
            AND TRADE_DT <= {1}
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        HS300 = oracle.query(sql2)

    CSI500.columns = ["tradeday", "ticker"]
    CSI500["CSI500"] = True
    HS300.columns = ["tradeday", "ticker"]
    HS300["HS300"] = True

    if daily_universe is None:
        tradedays = getTradeCalendar(start_date, end_date)
        daily_universe = pd.DataFrame([])
        for tradeday in tradedays:
            daily_universe = daily_universe.append(makeDailyUniverse(tradeday))

    daily_universe = pd.merge(daily_universe,
                              CSI500,
                              on=["tradeday", "ticker"],
                              how="left")
    daily_universe = pd.merge(daily_universe,
                              HS300,
                              on=["tradeday", "ticker"],
                              how="left")
    daily_universe["CSI500"].fillna(False, inplace=True)
    daily_universe["HS300"].fillna(False, inplace=True)
    daily_universe.sort_values(by=["tradeday", "ticker"], inplace=True)

    daily_universe.index = list(range(daily_universe.shape[0]))
    daily_universe.to_csv("debug.csv", encoding="gbk")
    return daily_universe
Exemple #7
0
def makeStkSWIndustry(start_date: str, end_date: str, daily_universe=None) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天股票的申万行业一级分类和二级分类。
    AShareSWIndustriesClass表中[ENTRY_DT, REMOVE_DT]是闭区间,与AShareST中的左闭右开区间不同,因此使用makeStkName逻辑。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str, sw_industry_L1 - str, sw_industry_L2 - str]
    '''

    sql1 = \
        '''
        SELECT
            a.S_INFO_WINDCODE,
            b.INDUSTRIESNAME,
            a.ENTRY_DT,
            a.REMOVE_DT 
        FROM
            ASHARESWINDUSTRIESCLASS a,
            ASHAREINDUSTRIESCODE b 
        WHERE
            substr( a.SW_IND_CODE, 1, 4 ) = substr( b.INDUSTRIESCODE, 1, 4 ) 
            AND b.LEVELNUM = '2' 
            AND ENTRY_DT <= {1}
            AND (REMOVE_DT >= {0} OR REMOVE_DT IS NULL)
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        industry_L1 = oracle.query(sql1)
    sql2 = \
        '''
        SELECT
            a.S_INFO_WINDCODE,
            b.INDUSTRIESNAME,
            a.ENTRY_DT,
            a.REMOVE_DT 
        FROM
            ASHARESWINDUSTRIESCLASS a,
            ASHAREINDUSTRIESCODE b 
        WHERE
            substr( a.SW_IND_CODE, 1, 6 ) = substr( b.INDUSTRIESCODE, 1, 6 ) 
            AND b.LEVELNUM = '3' 
            AND ENTRY_DT <= {1}
            AND (REMOVE_DT >= {0} OR REMOVE_DT IS NULL)
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        industry_L2 = oracle.query(sql2)
    industry_L1['REMOVE_DT'] = np.where(industry_L1['REMOVE_DT'].isna(), end_date, industry_L1['REMOVE_DT'])
    industry_L2['REMOVE_DT'] = np.where(industry_L2['REMOVE_DT'].isna(), end_date, industry_L2['REMOVE_DT'])


    if daily_universe is None:
        tradedays = getTradeCalendar(start_date, end_date)
        daily_universe = pd.DataFrame([])
        for tradeday in tradedays:
            daily_universe = daily_universe.append(makeDailyUniverse(tradeday))
    daily_universe['sw_industry_L1'] = None
    daily_universe['sw_industry_L2'] = None

    for idx, record in industry_L1.iterrows():
        ticker = record['S_INFO_WINDCODE']
        start_dt = record['ENTRY_DT']
        end_dt = record['REMOVE_DT']
        industry_name = record['INDUSTRIESNAME']
        daily_universe['sw_industry_L1'] = np.where((daily_universe['ticker'] == ticker) \
                                          & (daily_universe['tradeday'] >= start_dt) \
                                          & (daily_universe['tradeday'] <= end_dt),
                                          industry_name, daily_universe['sw_industry_L1'])
    for idx, record in industry_L2.iterrows():
        ticker = record['S_INFO_WINDCODE']
        start_dt = record['ENTRY_DT']
        end_dt = record['REMOVE_DT']
        industry_name = record['INDUSTRIESNAME']
        daily_universe['sw_industry_L2'] = np.where((daily_universe['ticker'] == ticker) \
                                          & (daily_universe['tradeday'] >= start_dt) \
                                          & (daily_universe['tradeday'] <= end_dt),
                                          industry_name, daily_universe['sw_industry_L2'])
    daily_universe.index = list(range(daily_universe.shape[0]))
    return daily_universe
Exemple #8
0
def makeStkSuspend(start_date: str,
                   end_date: str,
                   daily_universe=None) -> pd.DataFrame:
    '''
    查询某一日期区间内(包括查询初始日和结束日)每天的股票的停牌状态。是否停牌用布尔值表示(停牌是True),
    停牌类型、停牌原因、停牌原因代码缺省值是None。
    :param start_date: str, 初始日期,"YYYYMMDD"
    :param end_date: str, 结束日期,"YYYYMMDD"
    :return: pd.DataFrame, columns = [tradeday - str,ticker - str,name - str, suspend - bool,
                    suspend_type - str, suspend_reason_code - object, suspend_reason - str]
    '''

    sql = \
        '''
        SELECT
            S_INFO_WINDCODE,
            S_DQ_SUSPENDDATE,
            S_DQ_RESUMPDATE,
            S_DQ_SUSPENDTYPE,
            S_DQ_CHANGEREASONTYPE,
            S_DQ_CHANGEREASON 
        FROM
            ASHARETRADINGSUSPENSION 
        WHERE
                ( S_DQ_RESUMPDATE IS NULL 
                AND S_DQ_SUSPENDDATE >= {0}
                AND S_DQ_SUSPENDDATE <= {1} ) 
            OR 
                (S_DQ_RESUMPDATE IS NOT NULL 
                AND S_DQ_SUSPENDDATE <= {1}
                AND S_DQ_RESUMPDATE >= {0} )
        '''.format(start_date, end_date)
    with OracleSql() as oracle:
        name_range = oracle.query(sql)
    name_range['last_suspend_date'] = np.where(
        name_range['S_DQ_RESUMPDATE'].isna(), name_range["S_DQ_SUSPENDDATE"],
        get_last_suspend_list(name_range['S_DQ_RESUMPDATE']))
    name_range["S_DQ_SUSPENDTYPE"] = name_range["S_DQ_SUSPENDTYPE"].apply(
        parse_suspend_type)

    if daily_universe is None:
        tradedays = getTradeCalendar(start_date, end_date)
        daily_universe = pd.DataFrame([])
        for tradeday in tradedays:
            daily_universe = daily_universe.append(makeDailyUniverse(tradeday))
    daily_universe['suspend'] = False
    daily_universe['suspend_type'] = None
    daily_universe['suspend_reason_code'] = None
    daily_universe['suspend_reason'] = None

    for idx, record in name_range.iterrows():
        ticker = record['S_INFO_WINDCODE']
        start_dt = record['S_DQ_SUSPENDDATE']
        end_dt = record['last_suspend_date']
        suspend_type = record['S_DQ_SUSPENDTYPE']
        suspend_reason = record["S_DQ_CHANGEREASON"]
        suspend_reason_code = record["S_DQ_CHANGEREASONTYPE"]
        logic = (daily_universe['ticker'] == ticker) & (daily_universe['tradeday'] >= start_dt) & \
                (daily_universe['tradeday'] <= end_dt)
        daily_universe['suspend'] = np.where(logic, True,
                                             daily_universe['suspend'])
        daily_universe['suspend_type'] = np.where(
            logic, suspend_type, daily_universe['suspend_type'])
        daily_universe['suspend_reason'] = np.where(
            logic, suspend_reason, daily_universe['suspend_reason'])
        daily_universe['suspend_reason_code'] = np.where(
            logic, suspend_reason_code, daily_universe['suspend_reason_code'])

    daily_universe.index = list(range(daily_universe.shape[0]))
    return daily_universe