Esempio n. 1
0
def get_momentum(stock_list, date, market_cap_on_current_day):

    trading_date_525_before = rqdatac.get_trading_dates(date - timedelta(days=1000), date, country='cn')[-525]

    trading_date_21_before = rqdatac.get_trading_dates(date - timedelta(days=40), date, country='cn')[-21]

    # 共需要 525 - 21 = 504 个交易日的收益率

    exp_weight = get_exponential_weight(half_life=126, length=504)

    # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0)

    daily_return = rqdatac.get_price(stock_list, trading_date_525_before, trading_date_21_before, frequency='1d',
                                     fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index

    daily_return = daily_return.drop(daily_return[inds], axis=1)

    # 把复利无风险日收益率转为日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(start_date=trading_date_525_before, end_date=date, tenor='0S')

    risk_free_return = (((1 + compounded_risk_free_return) ** (1 / 365)) - 1).loc[daily_return.index]

    relative_strength = np.log(1 + daily_return).T.subtract(np.log(1 + risk_free_return.iloc[:, 0])).dot(exp_weight)

    processed_relative_strength = winsorization_and_market_cap_weighed_standardization(relative_strength,
                                                                                       market_cap_on_current_day[
                                                                                           relative_strength.index])

    return processed_relative_strength
Esempio n. 2
0
def get_multiperiod_factor_returns(all_factors, latest_trading_date,
                                   parameters):

    # 取出多期的收益率,在 Newey West 中计算当期因子收益和滞后因子收益的经验协方差

    end_dates = rqdatac.get_trading_dates(
        latest_trading_date - timedelta(days=30),
        latest_trading_date,
        country='cn')[-parameters.get('NeweyWest_volatility_lags'):]

    start_dates = rqdatac.get_trading_dates(
        latest_trading_date - timedelta(days=400),
        latest_trading_date,
        country='cn')[-(parameters.get('factor_return_length') +
                        parameters.get('NeweyWest_volatility_lags')
                        ):-parameters.get('factor_return_length')]

    # 以百分比为单位,所以乘以 100

    daily_factor_return = rqdatac.barra.get_factor_return(
        start_dates[0], end_dates[-1], all_factors) * 100

    multiperiod_daily_factor_return = {}

    for i in range(1, parameters.get('NeweyWest_volatility_lags') + 1):

        multiperiod_daily_factor_return['lag_' + str(i)] = daily_factor_return[
            -(parameters.get('factor_return_length') + i):-i]

    # 返回当期的因子收益序列,以及滞后N期的因子收益序列

    return daily_factor_return[-parameters.get(
        'factor_return_length'):], multiperiod_daily_factor_return
def get_multiperiod_factor_returns(latest_trading_date, parameters):

    industry_factors = [
        'CNE5S_ENERGY', 'CNE5S_CHEM', 'CNE5S_CONMAT', 'CNE5S_MTLMIN',
        'CNE5S_MATERIAL', 'CNE5S_AERODEF', 'CNE5S_BLDPROD', 'CNE5S_CNSTENG',
        'CNE5S_ELECEQP', 'CNE5S_INDCONG', 'CNE5S_MACH', 'CNE5S_TRDDIST',
        'CNE5S_COMSERV', 'CNE5S_AIRLINE', 'CNE5S_MARINE', 'CNE5S_RDRLTRAN',
        'CNE5S_AUTO', 'CNE5S_HOUSEDUR', 'CNE5S_LEISLUX', 'CNE5S_CONSSERV',
        'CNE5S_MEDIA', 'CNE5S_RETAIL', 'CNE5S_PERSPRD', 'CNE5S_BEV',
        'CNE5S_FOODPROD', 'CNE5S_HEALTH', 'CNE5S_BANKS', 'CNE5S_DVFININS',
        'CNE5S_REALEST', 'CNE5S_SOFTWARE', 'CNE5S_HDWRSEMI', 'CNE5S_UTILITIE'
    ]

    style_factors = [
        'CNE5S_BETA', 'CNE5S_MOMENTUM', 'CNE5S_SIZE', 'CNE5S_EARNYILD',
        'CNE5S_RESVOL', 'CNE5S_GROWTH', 'CNE5S_BTOP', 'CNE5S_LEVERAGE',
        'CNE5S_LIQUIDTY', 'CNE5S_SIZENL'
    ]

    country_factor = ['CNE5S_COUNTRY']

    all_factors = industry_factors + style_factors + country_factor

    # 取出多期的收益率,在 Newey West 中计算当期因子收益和滞后因子收益的经验协方差

    end_dates = rqdatac.get_trading_dates(
        latest_trading_date - timedelta(days=30),
        latest_trading_date,
        country='cn')[-parameters.get('NeweyWest_volatility_lags'):]

    start_dates = rqdatac.get_trading_dates(
        latest_trading_date - timedelta(days=400),
        latest_trading_date,
        country='cn')[-(parameters.get('factor_return_length') +
                        parameters.get('NeweyWest_volatility_lags')
                        ):-parameters.get('factor_return_length')]

    # 以百分比为单位,所以乘以 100

    daily_factor_return = rqdatac.barra.get_factor_return(
        start_dates[0], end_dates[-1], all_factors) * 100

    multiperiod_daily_factor_returns = {}

    for i in range(1, parameters.get('NeweyWest_volatility_lags') + 1):

        multiperiod_daily_factor_returns[
            'lag_' + str(i)] = daily_factor_return[-(
                parameters.get('factor_return_length') + i):-i]

    # 返回当期的因子收益序列,以及滞后N期的因子收益序列

    multiperiod_daily_factor_returns['current'] = daily_factor_return[
        -parameters.get('factor_return_length'):]

    return multiperiod_daily_factor_returns
Esempio n. 4
0
def get_multiperiod_factor_returns(latest_trading_date, parameters):

    if str(latest_trading_date) >= '2014-01-01':

        industry_factors = ['农林牧渔', '采掘', '化工', '钢铁', '有色金属', '电子', '家用电器', '食品饮料', '纺织服装', '轻工制造',\
                            '医药生物', '公用事业', '交通运输', '房地产', '商业贸易', '休闲服务','综合', '建筑材料',  '建筑装饰', '电气设备',\
                            '国防军工', '计算机', '传媒', '通信', '银行', '非银金融', '汽车', '机械设备']
    else:

        industry_factors = [
            '金融服务', '房地产', '医药生物', '有色金属', '餐饮旅游', '综合', '建筑建材', '家用电器',
            '交运设备', '食品饮料', '电子', '信息设备', '交通运输', '轻工制造', '公用事业', '机械设备',
            '纺织服装', '农林牧渔', '商业贸易', '化工', '信息服务', '采掘', '黑色金属'
        ]

    style_factors = [
        'beta', 'momentum', 'earnings_yield', 'residual_volatility', 'growth',
        'book_to_price', 'leverage', 'liquidity'
    ]

    country_factor = ['comovement']

    all_factors = industry_factors + style_factors + country_factor

    # 取出多期的收益率,在 Newey West 中计算当期因子收益和滞后因子收益的经验协方差

    end_dates = rqdatac.get_trading_dates(
        latest_trading_date - timedelta(days=30),
        latest_trading_date,
        country='cn')[-parameters.get('NeweyWest_volatility_lags'):]
    start_dates = rqdatac.get_trading_dates(
        latest_trading_date - timedelta(days=6000),
        latest_trading_date,
        country='cn')[-(parameters.get('factor_return_length') +
                        parameters.get('NeweyWest_volatility_lags')
                        ):-parameters.get('factor_return_length')]

    # 以百分比为单位,所以乘以 100

    daily_factor_return = rqdatac.get_factor_return(
        start_dates[0], end_dates[-1], all_factors) * 100
    multiperiod_factor_returns = {}

    for i in range(1, parameters.get('NeweyWest_volatility_lags') + 1):

        multiperiod_factor_returns['lag_' + str(i)] = daily_factor_return[-(
            parameters.get('factor_return_length') + i):-i]

    # 返回当期的因子收益序列,以及滞后N期的因子收益序列

    multiperiod_factor_returns['current'] = daily_factor_return[
        -parameters.get('factor_return_length'):]

    return multiperiod_factor_returns
def spread_analysis(underlying_list, start_date, end_date):
    date_list = rq.get_trading_dates(start_date=start_date, end_date=end_date)
    spread_data = pd.DataFrame()
    describe_data = pd.DataFrame()

    for date_ind in date_list:
        contract_list = []
        for underlying in underlying_list:
            contract = rq.futures.get_dominant(underlying,
                                               start_date=date_ind,
                                               end_date=date_ind,
                                               rule=0)
            contract_list.append(contract[date_ind])
        # print(contract_list)

        try:
            daily_data = daily_compute(date_ind, contract_list)
        except AttributeError:
            print(date_ind + '计算错误')
        else:
            print(date_ind)
            spread_data = pd.concat([spread_data, daily_data], axis=0)
            describe_data[date_ind] = daily_data['spread_pct'].describe()

    return spread_data
Esempio n. 6
0
    def get_stocks(self):
        all_dates = rqd.get_trading_dates(self.begin_date, self.end_date)
        all_dates = [i.strftime('%Y%m%d') for i in all_dates]
        adjusted_dates = []
        date_codes_dict = {}
        # 获取因子数据
        fm = FactorModule()
        for index in range(0, len(all_dates), self.interval):
            date = all_dates[index]
            print('正在获取:', date)
            adjusted_dates.append(date)
            # # 先保存现在持有的股票中的停牌股
            # if len(stocklist) > 0:
            #     stocks_suspend = []
            #     for code in stocklist:
            #         if rqd.is_suspended(code, date, date).squeeze():
            #             stocks_suspend.append(code)
            #     stocklist = stocks_suspend

            # 去除了今天停牌、ST、上市不满60
            universe = filter_stock_pool(date)
            # 用的是米筐的数据
            df_factor = fm.get_factor_one_day(universe, 'pe_ratio_ttm', date)
            stocklist = df_factor[df_factor > 0].nsmallest(100).index.tolist()
            date_codes_dict[date] = stocklist
        return adjusted_dates, date_codes_dict
def trading_data(underlying_list, start_date, end_date):
    date_list = rq.get_trading_dates(start_date=start_date, end_date=end_date)
    price_data = pd.DataFrame()

    for date_ind in date_list:
        print(date_ind)
        contract_list = []
        for underlying in underlying_list:
            contract = rq.futures.get_dominant(underlying, start_date=date_ind, end_date=date_ind, rule=0)
            contract_list.append(contract[date_ind])

        data_load = intercommodityArbitrage.futureData.future_data_load(contract_list, start_date=date_ind,
                                                                        end_date=date_ind)

        data_df = pd.DataFrame()
        data_df['trading_date'] = data_load[contract_list[0]]['trading_date']
        columns_list = ['last', 'a1', 'b1']
        for contract_id in contract_list:
            contract_data = data_load[contract_id][columns_list]
            contract_data.columns = [contract_id[:2] + '_' + columns_name for columns_name in columns_list]
            data_df = pd.concat([data_df, contract_data], axis=1)

        price_data = pd.concat([price_data, data_df], axis=0)

    return price_data
Esempio n. 8
0
def future_data_load(args, start_date, end_date):
    """
    期货tick数据
    :param args: 合约list
    :param start_date: 开始日期
    :param end_date: 结束日期
    :return:
    """
    if args:
        data_dict = dict()
        date_list = rq.get_trading_dates(start_date=start_date, end_date=end_date)

        for contract_id in args:
            data = pd.DataFrame()

            for date_ind in date_list:
                # date_ind = date_ind.strftime('%Y%m%d')
                try:
                    daily_data = get_tick(contract_id, date_ind, date_ind)
                except AttributeError:
                    print(contract_id)
                else:
                    # daily_data.drop_duplicates(keep='first', inplace=True)
                    daily_data = daily_data.loc[~daily_data.index.duplicated(keep='first')]
                    daily_data = data_resample(daily_data)
                    data = pd.concat([data, daily_data], axis=0)

            data_dict[contract_id] = data

        return data_dict
Esempio n. 9
0
 def crawl(self, begin_date=None, end_date=None):
     if begin_date is None and end_date is None:
         return
     # 区间交易日
     dates = rqd.get_trading_dates(begin_date, end_date)
     for date in dates:
         # 当天所有股票
         stocks = rqd.all_instruments(type='CS',
                                      date=date)['order_book_id'].tolist()
         update_requests = []
         for i in rqd.instruments(stocks):
             doc = i.__dict__
             # 股票信息与时间无关
             update_requests.append(
                 UpdateOne({'code': doc['order_book_id']}, {'$set': doc},
                           upsert=True))
         if len(update_requests) > 0:
             # bulk_write 批量写入
             # 写入daily数据集合(表) , 不按顺序ordered = False
             update_result = DB_CONN['basic'].bulk_write(update_requests,
                                                         ordered=False)
             print('保存-%s-%s数据 , 插入:%4d , 更新:%4d' %
                   (date, 'basic', update_result.upserted_count,
                    update_result.modified_count),
                   flush=True)
Esempio n. 10
0
def get_previous_trading_days_customized(n):
    today = dt.datetime.now().date()
    if len(rqdatac.get_trading_dates(today, today)) == 1:
        n -= 1
        yield today
    while n > 0:
        today = rqdatac.get_previous_trading_date(today)
        yield today
        n -= 1
Esempio n. 11
0
def get_cumulative_range(stock_list, date, market_cap_on_current_day):

    trading_date_253_before = rqdatac.get_trading_dates(date -
                                                        timedelta(days=500),
                                                        date,
                                                        country='cn')[-253]

    daily_return = rqdatac.get_price(
        stock_list,
        trading_date_253_before,
        date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index

    daily_return = daily_return.drop(daily_return[inds], axis=1)

    # 把复利无风险日收益率转为日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=trading_date_253_before, end_date=date, tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[daily_return.index]

    # 每21个交易日为一个时间区间

    spliting_points = np.arange(0, 273, 21)

    cumulative_return = pd.DataFrame()

    for period in range(1, len(spliting_points)):

        compounded_return = (
            (1 + daily_return.iloc[spliting_points[0]:spliting_points[period]]
             ).cumprod() - 1).iloc[-1]

        compounded_risk_free_return = (
            (1 +
             risk_free_return.iloc[spliting_points[0]:spliting_points[period]]
             ).cumprod() - 1).iloc[-1]

        cumulative_return[period] = np.log(1 + compounded_return).subtract(
            np.log(1 + compounded_risk_free_return.iloc[0]))

    cumulative_return = cumulative_return.cumsum(axis=1)

    processed_cumulative_range = winsorization_and_market_cap_weighed_standardization(
        cumulative_return.T.max() - cumulative_return.T.min(),
        market_cap_on_current_day)

    return processed_cumulative_range
Esempio n. 12
0
def get_multiperiod_stock_returns(stock_list, latest_trading_date, parameters):

    # 取出多期的收益率,在 Newey West 中计算当期因子收益和滞后因子收益的经验协方差

    end_dates = rqdatac.get_trading_dates(latest_trading_date - timedelta(days=30), latest_trading_date, country='cn')[-parameters.get('Newey_West_Auto_Correlation_Lags'):]

    start_dates = rqdatac.get_trading_dates(latest_trading_date - timedelta(days=400), latest_trading_date, country='cn')[-(parameters.get('factor_return_length') + parameters.get('Newey_West_Auto_Correlation_Lags')):-parameters.get('factor_return_length')]

    # 以百分比为单位,所以乘以 100

    daily_specific_return = rqdatac.barra.get_specific_return(stock_list,start_dates[0],end_dates[-1])

    multiperiod_specific_return = {}

    for i in range(1, parameters.get('Newey_West_Auto_Correlation_Lags') + 1):

        multiperiod_specific_return['lag_' + str(i)] = daily_specific_return[-(parameters.get('factor_return_length') + i): -i]

    # 返回当期的因子收益序列,以及滞后N期的因子收益序列

    return daily_specific_return[-parameters.get('factor_return_length'):], multiperiod_specific_return
Esempio n. 13
0
def get_stock_beta(stock_list, stock_excess_return, benchmark,
                   latest_trading_date, market_cap_on_current_day):

    trading_date_253_before = rqdatac.get_trading_dates(latest_trading_date -
                                                        timedelta(days=500),
                                                        latest_trading_date,
                                                        country='cn')[-253]

    exp_weight = get_exponential_weight(half_life=63, length=252)

    weighted_stock_excess_return = stock_excess_return.T.multiply(exp_weight).T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=trading_date_253_before,
        end_date=latest_trading_date,
        tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[stock_excess_return.index]

    market_portfolio_daily_return = rqdatac.get_price(
        benchmark,
        trading_date_253_before,
        latest_trading_date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    market_portfolio_excess_return = market_portfolio_daily_return.subtract(
        risk_free_return.iloc[:, 0])

    weighted_market_portfolio_excess_return = market_portfolio_excess_return.multiply(
        exp_weight).T

    weighted_market_portfolio_variance = weighted_market_portfolio_excess_return.var(
    )

    beta = [
        weighted_market_portfolio_excess_return.cov(
            weighted_stock_excess_return[stock]) /
        weighted_market_portfolio_variance
        for stock in stock_excess_return.columns
    ]

    stock_beta = pd.Series(beta, index=stock_excess_return.columns)

    # 用回归方法处理 beta 的缺失值

    imputed_stock_beta = individual_factor_imputation(
        stock_list, stock_beta, market_cap_on_current_day,
        latest_trading_date.strftime('%Y-%m-%d'))

    return imputed_stock_beta
Esempio n. 14
0
def volatility_regime_adjustment(factor_covariance, factor_return, date,
                                 parameters):

    # 以权重半衰期的四倍作为样本数量
    start_date = pd.Timestamp(date) - relativedelta(
        months=np.ceil(4 * parameters['VRA_half_life'] / 252 * (12 * 3 / 2)))
    end_date = rqdatac.get_previous_trading_date(date)
    trading_dates = rqdatac.get_trading_dates(
        start_date, end_date)[-4 * parameters['VRA_half_life']:]

    forecast_factor_volatility = pd.DataFrame()

    # for date in trading_dates:
    #
    #     previous_factor_covariance = rqdatac.barra.get_factor_covariance(date)
    #     forecast_factor_volatility[date] = pd.Series(data=np.diag(previous_factor_covariance), index=factor_covariance.index).pow(0.5)
    for date in trading_dates:

        if np.isnan(parameters['NeweyWest_volatility_lags']):

            previous_factor_covariance = pickle.load(
                open('/Users/rice/Desktop/covariance/daliy_eigen.pkl',
                     'rb'))[date]

        elif parameters == shortTermParameters:
            previous_factor_covariance = pickle.load(
                open('/Users/rice/Desktop/covariance/short_eigen.pkl',
                     'rb'))[date]

        else:
            previous_factor_covariance = pickle.load(
                open('/Users/rice/Desktop/covariance/long_eigen.pkl',
                     'rb'))[date]

        forecast_factor_volatility[date] = pd.Series(
            data=np.diag(previous_factor_covariance),
            index=factor_covariance.index).pow(0.5)

    # 反年化处理,计算日波动率
    daily_factor_volatility = forecast_factor_volatility / np.sqrt(252)

    exp_weight = get_exponential_weight(parameters['VRA_half_life'],
                                        4 * parameters['VRA_half_life'])
    cross_sectional_bias = (
        factor_return.loc[trading_dates[0]:trading_dates[-1]] /
        daily_factor_volatility.T).pow(2).mean(axis=1).pow(0.5)
    factor_volatility_multiplier = cross_sectional_bias.dot(exp_weight)
    VRA_adjusted_covariance = factor_covariance * (factor_volatility_multiplier
                                                   **2)

    return VRA_adjusted_covariance
Esempio n. 15
0
def spread_compute(start_date, end_date, contract_list):
    date_list = rq.get_trading_dates(start_date=start_date, end_date=end_date)
    data_final = pd.DataFrame()

    for date_ind in date_list:
        date_ind = date_ind.strftime('%Y%m%d')
        try:
            daily_data = daily_compute(date_ind, contract_list)
        except AttributeError:
            print(date_ind + '计算错误')
        else:
            data_final = pd.concat([data_final, daily_data], axis=0)

    return data_final
Esempio n. 16
0
def update_minute1_lib():
    minute1_lib = arctic['minute1']
    last_index = minute1_lib.read('000001.XSHG').data.index[-1]

    if last_index.time() == time(15):
        start_date = rq.get_next_trading_date(last_index)

        if not rq.get_trading_dates(start_date, date.today()):
            print('DB is already up to date.')
            return

        for sid in tqdm(all_sid()):
            try:
                df = rq.get_price(sid,
                                  start_date=start_date,
                                  end_date=date.today(),
                                  frequency='1m',
                                  adjust_type='post')
                df.index.name = 'date'
                if len(df) > 0:
                    minute1_lib.append(sid, df, upsert=True)
            except Exception as e:
                # TODO: add logger later
                print(f'{sid}: {str(e)}')
    else:
        # read previous version, and get last index to compute start date
        previous_version = minute1_lib.list_versions('000001.XSHG')[1]
        last_index = minute1_lib.read(
            '000001.XSHG', as_of=previous_version['version']).data.index[-1]
        start_date = rq.get_next_trading_date(last_index)

        for sid in tqdm(all_sid()):
            try:
                df = rq.get_price(sid,
                                  start_date=start_date,
                                  end_date=date.today(),
                                  frequency='1m',
                                  adjust_type='post')
                df.index.name = 'date'
                pre_version_number = minute1_lib.list_versions(
                    sid)[1]['version']
                minute1_lib.restore_version(sid, pre_version_number)
                if len(df) > 0:
                    minute1_lib.append(sid, df, upsert=True)
            except Exception as e:
                # TODO: add logger later
                print(f'{sid}: {str(e)}')
Esempio n. 17
0
def market_cap_imputation(stock_list, market_cap_on_current_day,
                          latest_trading_date):

    missing_market_cap_list = list(
        set(stock_list) - set(market_cap_on_current_day.index.tolist()))

    price_on_current_day = rqdatac.get_price(
        missing_market_cap_list,
        start_date=latest_trading_date.strftime('%Y-%m-%d'),
        end_date=latest_trading_date.strftime('%Y-%m-%d'),
        frequency='1d',
        fields='close',
        adjust_type='none').T

    shares_on_current_day = rqdatac.get_shares(
        missing_market_cap_list,
        latest_trading_date.strftime('%Y-%m-%d'),
        latest_trading_date.strftime('%Y-%m-%d'),
        fields='total_a').T

    market_cap = pd.Series(
        data=(price_on_current_day *
              shares_on_current_day)[latest_trading_date.strftime('%Y-%m-%d')],
        index=missing_market_cap_list)

    if market_cap.isnull().any():

        missing_list = market_cap[market_cap.isnull()].index.tolist()

        trading_date_22_before = rqdatac.get_trading_dates(
            latest_trading_date - timedelta(days=50),
            latest_trading_date,
            country='cn')[-22]

        missing_market_cap = (rqdatac.get_factor(
            id_or_symbols=missing_list,
            factor='a_share_market_val',
            start_date=trading_date_22_before.strftime('%Y-%m-%d'),
            end_date=latest_trading_date.strftime('%Y-%m-%d')).mean()).fillna(
                market_cap_on_current_day.mean())

        market_cap = pd.concat([market_cap, missing_market_cap])

    imputed_market_cap_on_current_day = pd.concat(
        [market_cap_on_current_day, market_cap])

    return imputed_market_cap_on_current_day
Esempio n. 18
0
def get_liquidity(stock_list, date, market_cap_on_current_day):

    trading_date_252_before = rqdatac.get_trading_dates(date -
                                                        timedelta(days=500),
                                                        date,
                                                        country='cn')[-252]

    stock_without_suspended_stock = drop_suspended_stock(stock_list, date)

    trading_volume = rqdatac.get_price(stock_without_suspended_stock,
                                       trading_date_252_before,
                                       date,
                                       frequency='1d',
                                       fields='volume')

    outstanding_shares = rqdatac.get_shares(stock_without_suspended_stock,
                                            trading_date_252_before,
                                            date,
                                            fields='total_a')

    daily_turnover_rate = trading_volume.divide(outstanding_shares)

    # 对于对应时期内换手率为 0 的股票,其细分因子暴露度也设为0

    one_month_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-21:].sum().replace(0, np.nan)),
        market_cap_on_current_day)

    three_months_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-63:].sum().replace(0, np.nan) / 3),
        market_cap_on_current_day)

    twelve_months_share_turnover = winsorization_and_market_cap_weighed_standardization(
        np.log(daily_turnover_rate.iloc[-252:].sum().replace(0, np.nan) / 12),
        market_cap_on_current_day)

    liquidity = 0.35 * one_month_share_turnover.replace(
        np.nan, 0) + 0.35 * three_months_share_turnover.replace(
            np.nan, 0) + 0.3 * twelve_months_share_turnover.replace(np.nan, 0)

    processed_liquidity = winsorization_and_market_cap_weighed_standardization(
        liquidity, market_cap_on_current_day)

    return processed_liquidity
Esempio n. 19
0
def get_trading_dates_all_option(end_date, start_date=None) -> list:
    """
    :param start_date: define start date yourself
    :param end_date: datetime, the end date
    :return: list if trading dates
    """
    if start_date is None:
        _ori_date = rqdatac.all_instruments(type='Option')
        all_listed_date = _ori_date['listed_date'].apply(
            lambda x: dt.datetime.strptime(x, '%Y-%m-%d')).tolist()
        earliest_list_date = min(all_listed_date)
    else:
        earliest_list_date = start_date
    if end_date < earliest_list_date:
        return []

    # get trading date
    trading_dates = rqdatac.get_trading_dates(earliest_list_date, end_date)
    return trading_dates
Esempio n. 20
0
def update_day_lib():
    day_lib = arctic['day']
    last_index = day_lib.read('000001.XSHG').data.index[-1]
    start_date = rq.get_next_trading_date(last_index)
    if not rq.get_trading_dates(start_date, date.today()):
        print('DB is already up to date.')
        return

    for sid in tqdm(all_sid()):
        try:
            df = rq.get_price(sid,
                              start_date=start_date,
                              end_date=date.today(),
                              frequency='1d',
                              adjust_type='post')
            df.index.name = 'date'
            if len(df) > 0:
                day_lib.append(sid, df, upsert=True)
        except Exception as e:
            # TODO: add logger later
            print(f'{sid}: {str(e)}')
def future_data_load(data_dir, *args, start_date, end_date):
    """
    期货tick数据
    :param data_dir: 数据路径
    :param args: 合约list
    :param start_date: 开始日期
    :param end_date: 结束日期
    :return:
    """
    if args:
        date_list = rq.get_trading_dates(start_date=start_date,
                                         end_date=end_date)
        for contract_id in args:
            for date_ind in date_list:
                date_ind = date_ind.strftime('%Y%m%d')
                try:
                    contract_data = get_tick(contract_id, date_ind, date_ind)
                except AttributeError:
                    print(contract_id)
                else:
                    contract_data.to_csv(data_dir + contract_id + '_' +
                                         date_ind + '.csv')
Esempio n. 22
0
 def compute(self, begin_date, end_date):
     """
     计算区间因子并保存数据
     :param begin_date:
     :param end_date:
     :return:
     """
     dates = rqd.get_trading_dates(begin_date, end_date)
     dates = [i.strftime('%Y%m%d') for i in dates]
     for date in dates:
         # 调用子类的实现方法,计算因子
         factors = self.compute_one_day(date)
         update_requests = []
         for factor in factors:
             update_requests.append(
                 UpdateOne({'code': factor['code'], 'date': date},
                           {'$set': factor},
                           upsert=True)
             )
         if len(update_requests) > 0:
             update_result = self.collection.bulk_write(update_requests)
             print('保存-%s-%s数据 , 插入:%4d , 更新:%4d'
                   % (date, self.name, update_result.upserted_count, update_result.modified_count),
                   flush=True)
def statistics(underlying_list, start_date, end_date, time_yes, time_list):
    # 数据加载
    spread_data = intercommodityArbitrage.spreadAnalysis.spread_analysis(
        underlying_list, start_date, end_date)
    spread_data = spread_data.resample('1min',
                                       how='first',
                                       closed='left',
                                       label='left')

    date_list = rq.get_trading_dates(start_date, end_date)
    index_str = spread_data.index.strftime("%Y-%m-%d %H:%M:%S.%f")

    statistical_dict = {}

    for time_on in time_list:
        print(time_on)
        data_filter = pd.DataFrame(index=date_list[1:],
                                   columns=['spread_pre', 'spread_on'])
        statistical_details = pd.DataFrame(index=range(-20, 21),
                                           columns=[
                                               'count_all', 'count_inverse',
                                               'ratio_inverse', 'return_avg'
                                           ])

        for date in date_list[1:]:
            # date = date_list[1]
            # print(date)
            time_pre = date_list[date_list.index(date) - 1].strftime(
                "%Y-%m-%d") + " " + time_yes + ".000000"
            time_str = date.strftime("%Y-%m-%d") + " " + time_on + ".000000"
            data_filter.loc[date, 'spread_pre'] = spread_data["spread_pct"][
                index_str == time_pre].values[0]
            data_filter.loc[date, 'spread_on'] = spread_data["spread_pct"][
                index_str == time_str].values[0]

        for order in range(-20, 21):
            if order < 0:
                statistical_details.loc[order, 'count_all'] = (
                    data_filter['spread_pre'] < 0.001 * order).sum()
                statistical_details.loc[order, 'count_inverse'] = (
                    (data_filter['spread_on'] > 0) &
                    (data_filter['spread_pre'] < 0.001 * order)).sum()
                if (data_filter['spread_pre'] < 0.001 * order).sum():
                    statistical_details.loc[order, 'ratio_inverse'] = statistical_details.loc[order, 'count_inverse'] \
                                                                      / statistical_details.loc[order, 'count_all']
                    statistical_details.loc[
                        order, 'return_avg'] = data_filter['spread_on'][
                            data_filter['spread_pre'] < 0.001 * order].mean()
            elif order > 0:
                statistical_details.loc[order, 'count_all'] = (
                    data_filter['spread_pre'] > 0.001 * order).sum()
                statistical_details.loc[order, 'count_inverse'] = (
                    (data_filter['spread_on'] < 0) &
                    (data_filter['spread_pre'] > 0.001 * order)).sum()
                if (data_filter['spread_pre'] > 0.001 * order).sum():
                    statistical_details.loc[order, 'ratio_inverse'] = statistical_details.loc[order, 'count_inverse'] \
                                                                      / statistical_details.loc[order, 'count_all']
                    statistical_details.loc[
                        order, 'return_avg'] = data_filter['spread_on'][
                            data_filter['spread_pre'] > 0.001 * order].mean()
        statistical_details.drop(index=[0], inplace=True)
        statistical_dict[time_on] = statistical_details

    return statistical_dict
Esempio n. 24
0
def get_momentum_and_res_vol(date):

    latest_trading_date = rqdatac.get_previous_trading_date(datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))

    trading_date_252_before = rqdatac.get_trading_dates(latest_trading_date - timedelta(days=500), latest_trading_date, country='cn')[-252]

    stock_list = rqdatac.all_instruments(type='CS', date=latest_trading_date)['order_book_id'].values.tolist()

    ### 获取因子计算共用的行情数据和财务数据

    recent_report_type, annual_report_type, market_cap_on_current_day, \
    stock_excess_return, market_portfolio_excess_return, recent_five_annual_shares, \
    last_reported_non_current_liabilities, last_reported_preferred_stock = get_financial_and_market_data(stock_list,latest_trading_date,trading_date_252_before)

    # momentum和residual volatility计算

    market_portfolio_beta, market_portfolio_beta_exposure = get_market_portfolio_beta(stock_excess_return,market_portfolio_excess_return,market_cap_on_current_day)

    momentum = get_momentum(stock_list, latest_trading_date, market_cap_on_current_day)

    daily_standard_deviation, cumulative_range, historical_sigma, residual_volatility = get_residual_volatility(
        stock_list, latest_trading_date, stock_excess_return, market_portfolio_excess_return, market_cap_on_current_day,
        market_portfolio_beta_exposure, market_portfolio_beta)

    style_factors_exposure = pd.concat([momentum, residual_volatility], axis=1)

    style_factors_exposure.columns = ['momentum', 'residual_volatility']

    atomic_descriptors_exposure = pd.concat(
        [daily_standard_deviation, cumulative_range, historical_sigma], axis=1)

    atomic_descriptors_exposure.columns = ['daily_standard_deviation', 'cumulative_range', 'historical_sigma']

    # 提取财务数据的时候,会提取当前全市场股票的数据,因此 dataframe 中可能包含计算日期未上市的股票,需要对 style_factors_exposure 取子集

    atomic_descriptors_exposure = atomic_descriptors_exposure.loc[stock_list]

    style_factors_exposure = style_factors_exposure.loc[stock_list]

    # 用回归方法处理细分因子的缺失值

    imputed_atomic_descriptors = pd.DataFrame()

    for atomic_descriptor in atomic_descriptors_exposure.columns:
        imputed_atomic_descriptors[atomic_descriptor] = individual_factor_imputation(stock_list, atomic_descriptors_exposure[atomic_descriptor], market_cap_on_current_day,latest_trading_date.strftime('%Y-%m-%d'))

    # 用回归方法处理风格因子暴露度的缺失值

    imputed_style_factors_exposure = style_factors_imputation(style_factors_exposure, market_cap_on_current_day,latest_trading_date.strftime('%Y-%m-%d'))

    # 若经过缺失值处理后因子暴露度依旧存在缺失值,使用全市场股票进行回归,填补缺失值

    if imputed_style_factors_exposure.isnull().sum().sum() > 0:

        imputed_style_factors_exposure = factor_imputation(market_cap_on_current_day,imputed_style_factors_exposure)

    if imputed_atomic_descriptors.isnull().sum().sum() > 0:

        imputed_atomic_descriptors = factor_imputation(market_cap_on_current_day,imputed_atomic_descriptors)

    return imputed_atomic_descriptors, imputed_style_factors_exposure
Esempio n. 25
0
def data_process(order_book_ids,
                 asset_type,
                 start_date,
                 windows,
                 data_freq,
                 out_threshold_coefficient=None):
    """
    Clean data for covariance matrix calculation
    :param order_book_ids: str list. A selected list of assets.
    :param asset_type: str. "fund" or "stock"
    :param start_date: str. The first day for backtest.
    :param windows: int. Interval length for sample.
    :param out_threshold_coefficient: float, optional. Determine the threshold to filter out assets with too short data
    which may cause problem in covariance matrix calculation. Whose data length is shorter than threshold will
    be eliminated. Default: 0.5(out_threshold = 0.5*windows).
    :param data_freq: str. Support input: "D": daily data; "W": weekly data; "M": monthly data.
    Weekly data means the close price at the end of each week is taken; monthly means the close price at the end of each
    month. When weekly and monthly data are used, suspended days issues will not be considered. In addition, weekly and
    monthly data don't consider public holidays which have no trading. Users should use a windows a little bit larger
    to get desired data length.
    Users should be very careful when using weekly or monthly data to avoid the observations have too short length.
    :return:
    pandas DataFrame. Contain the prices after cleaning;
    pandas DataFrame. The order_book_ids filtered out and the reasons of elimination;
    str. A new start date for covariance calculation which may differ from default windows setting.
    """

    end_date = rqdatac.get_previous_trading_date(start_date)
    end_date = pd.to_datetime(end_date)
    # Choose the start date based on the windows inputted, can't work if backtest start date is earlier than
    # "1995-01-01". The windows for weekly and monthly data don't consider any public holidays which have no trading.
    windows_dict = {
        "D": -(windows + 1),
        "W": -(windows + 1) * 5,
        "M": -(windows + 1) * 22
    }
    start_date = rqdatac.get_trading_dates("2005-01-01",
                                           end_date)[windows_dict[data_freq]]
    reset_start_date = pd.to_datetime(start_date)

    if asset_type is 'fund':
        period_prices = rqdatac.fund.get_nav(order_book_ids,
                                             reset_start_date,
                                             end_date,
                                             fields='adjusted_net_value')
    elif asset_type is 'stock':
        period_data = rqdatac.get_price(order_book_ids,
                                        reset_start_date,
                                        end_date,
                                        frequency='1d',
                                        fields=['close', 'volume'])

        period_prices = period_data['close']
        period_volume = period_data['volume']

    if data_freq is not "D":
        period_prices = period_prices.asfreq(data_freq, method="pad")

    # Set up the threshold of elimination
    if out_threshold_coefficient is None:
        out_threshold = ceil(windows * 0.5)
    else:
        out_threshold = ceil(windows * out_threshold_coefficient)

    kickout_assets = pd.DataFrame(columns=["剔除原因"])

    # Check whether any stocks has long suspended trading periods, have been delisted or new-listed for less than 132
    # trading days and generate list for such stocks. For weekly and monthly data, only those assets which have too late
    # beginning date, were delisted or new-listed will be eliminated.
    if asset_type is "stock":
        if data_freq is "D":
            for i in order_book_ids:
                period_volume_i = period_volume.loc[:, i]
                period_volume_i_value_counts = period_volume_i.value_counts()
                period_volume_i_value_counts_index = period_volume_i_value_counts.index.values
                instrument_i_de_listed_date = rqdatac.instruments(
                    i).de_listed_date
                instrument_i_listed_date = pd.to_datetime(
                    rqdatac.instruments(i).listed_date)
                if not period_volume_i_value_counts.empty:
                    # New-listed stock test
                    if (end_date - instrument_i_listed_date).days <= 132:
                        temp = pd.DataFrame({"剔除原因": "上市时间少于132个交易日"},
                                            index=[i])
                        kickout_assets = kickout_assets.append(temp)
                    # Delisted test
                    elif instrument_i_de_listed_date != "0000-00-00":
                        if pd.to_datetime(
                                instrument_i_de_listed_date) < end_date:
                            temp = pd.DataFrame({"剔除原因": "已退市"}, index=[i])
                            kickout_assets = kickout_assets.append(temp)
                    # Long suspended test
                    elif 0 in period_volume_i_value_counts_index:
                        if period_volume_i_value_counts[
                                period_volume_i_value_counts_index ==
                                0][0] >= out_threshold:
                            temp = pd.DataFrame({"剔除原因": "停牌交易日数量过多"},
                                                index=[i])
                            kickout_assets = kickout_assets.append(temp)
                    # Late beginning day test and just-in-case test for missing values
                    elif period_volume_i.isnull().sum() >= out_threshold:
                        temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i])
                        kickout_assets = kickout_assets.append(temp)
                else:
                    temp = pd.DataFrame({"剔除原因": "无相关股票数据"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)
        else:
            for i in order_book_ids:
                period_prices_i = period_prices.loc[:, i]
                instrument_i_de_listed_date = rqdatac.instruments(
                    i).de_listed_date
                instrument_i_listed_date = pd.to_datetime(
                    rqdatac.instruments(i).listed_date)
                if not ((period_prices_i.isnull() == 0).sum() == 0):
                    # New-listed test
                    if (end_date - instrument_i_listed_date).days <= 132:
                        temp = pd.DataFrame({"剔除原因": "股票上市时间少于132个交易日"},
                                            index=[i])
                        kickout_assets = kickout_assets.append(temp)
                    # Delisted test
                    elif instrument_i_de_listed_date != "0000-00-00":
                        if pd.to_datetime(
                                instrument_i_de_listed_date) < end_date:
                            temp = pd.DataFrame({"剔除原因": "股票已退市"}, index=[i])
                            kickout_assets = kickout_assets.append(temp)
                    # Late beginning day test and just-in-case test for missing values
                    elif period_prices_i.isnull().sum() >= out_threshold:
                        temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i])
                        kickout_assets = kickout_assets.append(temp)
                else:
                    temp = pd.DataFrame({"剔除原因": "无相关股票数据"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)

        # # Check whether any ST stocks are included and generate a list for ST stocks
        # st_list = list(period_prices.columns.values[rqdatac.is_st_stock(order_book_ids,
        #                                                                 reset_start_date, end_date).sum(axis=0) > 0])
        # kickout_assets = kickout_assets.append(pd.DataFrame(["ST stocks"] * len(st_list),
        #                                                     columns=["剔除原因"], index=[st_list]))
    elif asset_type is "fund":
        for i in order_book_ids:
            period_prices_i = period_prices.loc[:, i]
            instrument_i_de_listed_date = rqdatac.fund.instruments(
                i).de_listed_date
            instrument_i_listed_date = pd.to_datetime(
                rqdatac.fund.instruments(i).listed_date)
            if not ((period_prices_i.isnull() == 0).sum() == 0):
                # New-listed test
                if (end_date - instrument_i_listed_date).days <= 132:
                    temp = pd.DataFrame({"剔除原因": "基金发行时间少于132个交易日"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)
                # Delisted test
                elif instrument_i_de_listed_date != "0000-00-00":
                    if pd.to_datetime(instrument_i_de_listed_date) < end_date:
                        temp = pd.DataFrame({"剔除原因": "基金已清算"}, index=[i])
                        kickout_assets = kickout_assets.append(temp)
                elif period_prices_i.isnull().sum() >= out_threshold:
                    temp = pd.DataFrame({"剔除原因": "缺失值过多"}, index=[i])
                    kickout_assets = kickout_assets.append(temp)
            else:
                temp = pd.DataFrame({"剔除原因": "无相关基金数据"}, index=[i])
                kickout_assets = kickout_assets.append(temp)

    period_prices = period_prices.fillna(method="pad")
    # Generate final kickout list which includes all the above
    final_kickout_list = list(set(kickout_assets.index))
    # Generate clean data and keep the original input id order
    clean_order_book_ids = list(set(order_book_ids) - set(final_kickout_list))

    clean_period_prices = period_prices.loc[reset_start_date:end_date,
                                            clean_order_book_ids]
    return clean_period_prices, kickout_assets, reset_start_date
def strategy(underlying_list, start_date, end_date, quantile=0.03, close=0.003, stop=-0.003, close_len=7200):
    # 数据加载
    future_data = trading_data(underlying_list, start_date=start_date, end_date=end_date)
    spread_data = intercommodityArbitrage.spreadAnalysis.spread_analysis(underlying_list, start_date, end_date)

    # 逐tick回测,获取交易信号
    trade_details = pd.DataFrame(columns=['openTime', 'closeTime', 'tradeDirection',
                                          'openSpread', 'closeSpread', 'profitSpread', 'profitTrade'])
    count_num = -1

    date_list = rq.get_trading_dates(start_date, end_date)
    for date in date_list:
        # date = date_list[0]
        print(date)
        hold_par = False
        pos_par = 0
        open_spread = 0
        open_order = 0

        daily_spread = spread_data[future_data['trading_date'] == date]

        STOP_LOSS_PRICE = stopLossPrice
        bBreak = bBreak
        sSetup = sSetup
        sEnter = sEnter
        bEnter = bEnter
        bSetup = bSetup
        sBreak = sBreak

        for order in range(1, daily_spread.shape[0] - 1):
            # order = 0
            last_spread = daily_spread.iloc[order, 4]

            if not hold_par:
                if last_spread > bBreak:
                    open_spread = last_spread
                    open_order = order
                    pos_par = 1
                    hold_par = True
                    count_num += 1
                    trade_details.loc[count_num, 'tradeDate'] = date
                    trade_details.loc[count_num, 'openTime'] = daily_spread.index[order]
                    trade_details.loc[count_num, 'tradeDirection'] = pos_par
                    trade_details.loc[count_num, 'openSpread'] = open_spread
                elif last_spread < sBreak:
                    open_spread = last_spread
                    open_order = order
                    pos_par = -1
                    hold_par = True
                    count_num += 1
                    trade_details.loc[count_num, 'tradeDate'] = date
                    trade_details.loc[count_num, 'openTime'] = daily_spread.index[order]
                    trade_details.loc[count_num, 'tradeDirection'] = pos_par
                    trade_details.loc[count_num, 'openSpread'] = open_spread
            else:
                profit_spread = last_spread - open_spread
                if (profit_spread <= STOP_LOSS_PRICE) and (pos_par == 1):
                    trade_details.loc[count_num, 'closeTime'] = daily_spread.index[order]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num, 'profitSpread'] = profit_spread
                    pos_par = 0
                    hold_par = False
                if (profit_spread >= -STOP_LOSS_PRICE) and (pos_par == -1):
                    trade_details.loc[count_num, 'closeTime'] = daily_spread.index[order]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num, 'profitSpread'] = -profit_spread
                    pos_par = 0
                    hold_par = False
    # 参数
    dateLen = 10
    startBenchDate = '0101'
    endBenchDate = '0331'
    contractList = ('000016.XSHG', '000300.XSHG', '000905.XSHG', '000852.XSHG',
                    '399006.XSHE')

    # 计算
    pctChange = pd.DataFrame(index=range(2011, 2021), columns=contractList)

    for year_id in range(2011, 2021):
        # year_id = 2011
        print(year_id)
        endDate = str(year_id) + endBenchDate
        startDate = str(year_id) + startBenchDate
        dateSeries = pd.Series(rq.get_trading_dates(startDate, endDate))
        dateDelta = [0] + [
            delta.days for delta in dateSeries.diff()[1:].tolist()
        ]
        dateDf = pd.DataFrame({'date': dateSeries, 'delta': dateDelta})
        dateDf = dateDf[dateDf['delta'] > 3]
        startDate = dateDf.iloc[0, 0]
        endDate = rq.get_next_trading_date(startDate, n=dateLen)
        print(startDate)

        Data = calendarArbitrage.dataLoad.data_load(contractList,
                                                    start_date=startDate,
                                                    end_date=endDate)

        dataDf = pd.DataFrame()
        for contract_id in contractList:
        orderList = contractCodeList.copy()
        orderList.append(indexCode)

        price = rq.get_price(orderList,
                             start_date=startDate,
                             end_date=endDate,
                             frequency='1d',
                             fields='close',
                             adjust_type='none',
                             skip_suspended=False,
                             market='cn',
                             expect_df=False)
        price = price[orderList]

        dateList = rq.get_trading_dates(start_date=startDate, end_date=endDate)
        yearlyData = pd.DataFrame(index=dateList, columns=contractCodeList)

        for codeIndex in contractCodeList:
            deListedDate = underlyingInstruments['de_listed_date'][
                underlyingInstruments['trading_code'] == codeIndex].values[0]
            deDateLen = [
                (datetime.datetime.strptime(deListedDate, '%Y-%m-%d').date() -
                 ind).days for ind in dateList
            ]
            yearlyData[codeIndex] = pd.Series(
                [ind / 365 + 0.0001 for ind in deDateLen], index=dateList)

        basisData = price.iloc[:, 0:4].sub(price.iloc[:, 4], axis=0)
        discountAbsolute = price.iloc[:, 0:4].div(price.iloc[:, 4], axis=0) - 1
        discountYearly = discountAbsolute.div(yearlyData, axis=0)
        date, rq_style_barra_industry, industry_factors)

    barra_factor_returns = factor_return_estimation(date, barra_exposure,
                                                    industry_factors)

    barra_style_rq_industry_factor_returns = factor_return_estimation(
        date, barra_style_rq_industry_exposure, shenwan_industry_name)

    return rq_factor_returns, rq_style_barra_industry_factor_returns, barra_factor_returns, barra_style_rq_industry_factor_returns


start_date = '2017-01-04'

end_date = '2017-12-31'

test_trading_dates = rqdatac.get_trading_dates(start_date, end_date)
barra_style_factors = [
    'CNE5S_BETA', 'CNE5S_MOMENTUM', 'CNE5S_SIZE', 'CNE5S_EARNYILD',
    'CNE5S_RESVOL', 'CNE5S_GROWTH', 'CNE5S_BTOP', 'CNE5S_LEVERAGE',
    'CNE5S_LIQUIDTY', 'CNE5S_SIZENL'
]

rqreturns = pd.DataFrame(index=test_trading_dates, columns=barra_style_factors)

rsbireturns = pd.DataFrame(index=test_trading_dates,
                           columns=barra_style_factors)

breturns = pd.DataFrame(index=test_trading_dates, columns=barra_style_factors)

bsrireturns = pd.DataFrame(index=test_trading_dates,
                           columns=barra_style_factors)
def strategy(underlying_list,
             start_date,
             end_date,
             diff=0.0015,
             stop=-0.001,
             close=0.001,
             open_len=1200,
             close_len=7200):
    # 数据加载
    future_data = trading_data(underlying_list,
                               start_date=start_date,
                               end_date=end_date)
    spread_data = intercommodityArbitrage.spreadAnalysis.spread_analysis(
        underlying_list, start_date, end_date)

    # 逐tick回测,获取交易信号
    trade_details = pd.DataFrame(columns=[
        'tradeDate', 'openTime', 'closeTime', 'tradeDirection', 'openSpread',
        'closeSpread', 'profitSpread', 'profitTrade'
    ])
    count_num = -1

    date_list = rq.get_trading_dates(start_date, end_date)
    for date in date_list:
        # date = date_list[0]
        print(date)
        hold_par = False
        pos_par = 0
        stop_par = stop
        close_par = close
        open_spread = 0
        open_order = 0

        daily_spread = spread_data[future_data['trading_date'] == date]

        for order in range(1, daily_spread.shape[0] - 1):
            # order = open_len
            if order < open_len:
                data_series = daily_spread.iloc[0:order, 4]
            else:
                data_series = daily_spread.iloc[order - open_len:order, 4]

            last_spread = data_series.iloc[-1]
            max_id = np.max(data_series)
            min_id = np.min(data_series)

            if not hold_par:
                if (last_spread - min_id >= diff) and (max_id - last_spread >=
                                                       diff):
                    open_spread = last_spread
                    open_order = order
                    hold_par = True
                    count_num += 1
                    if last_spread - data_series.iloc[0] >= 0:
                        pos_par = -1
                    else:
                        pos_par = 1
                    trade_details.loc[count_num, 'tradeDate'] = date
                    trade_details.loc[count_num,
                                      'openTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'tradeDirection'] = pos_par
                    trade_details.loc[count_num, 'openSpread'] = open_spread
                elif (last_spread - min_id >= diff) and (max_id - last_spread <
                                                         diff):
                    open_spread = last_spread
                    open_order = order
                    pos_par = -1
                    hold_par = True
                    count_num += 1
                    trade_details.loc[count_num, 'tradeDate'] = date
                    trade_details.loc[count_num,
                                      'openTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'tradeDirection'] = pos_par
                    trade_details.loc[count_num, 'openSpread'] = open_spread
                elif (last_spread - min_id < diff) and (max_id - last_spread >=
                                                        diff):
                    open_spread = last_spread
                    open_order = order
                    pos_par = 1
                    hold_par = True
                    count_num += 1
                    trade_details.loc[count_num, 'tradeDate'] = date
                    trade_details.loc[count_num,
                                      'openTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'tradeDirection'] = pos_par
                    trade_details.loc[count_num, 'openSpread'] = open_spread
            else:
                profit_spread = last_spread - open_spread
                if (profit_spread <= -close_par) and (pos_par == -1):
                    trade_details.loc[count_num,
                                      'closeTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num,
                                      'profitSpread'] = -profit_spread
                    pos_par = 0
                    hold_par = False
                if (profit_spread >= -stop_par) and (pos_par == -1):
                    trade_details.loc[count_num,
                                      'closeTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num,
                                      'profitSpread'] = -profit_spread
                    pos_par = 0
                    hold_par = False
                if (order - open_order > close_len) and (pos_par == -1):
                    trade_details.loc[count_num,
                                      'closeTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num,
                                      'profitSpread'] = -profit_spread
                    pos_par = 0
                    hold_par = False
                if (profit_spread >= close_par) and (pos_par == 1):
                    trade_details.loc[count_num,
                                      'closeTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num,
                                      'profitSpread'] = profit_spread
                    pos_par = 0
                    hold_par = False
                if (profit_spread <= stop_par) and (pos_par == 1):
                    trade_details.loc[count_num,
                                      'closeTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num,
                                      'profitSpread'] = profit_spread
                    pos_par = 0
                    hold_par = False
                if (order - open_order > close_len) and (pos_par == 1):
                    trade_details.loc[count_num,
                                      'closeTime'] = data_series.index[-1]
                    trade_details.loc[count_num, 'closeSpread'] = last_spread
                    trade_details.loc[count_num,
                                      'profitSpread'] = profit_spread
                    pos_par = 0
                    hold_par = False
        if pos_par == 1:
            data_series = daily_spread.iloc[-open_len:, 4]
            last_spread = data_series.iloc[-1]
            trade_details.loc[count_num, 'closeTime'] = data_series.index[-1]
            trade_details.loc[count_num, 'closeSpread'] = last_spread
            trade_details.loc[count_num,
                              'profitSpread'] = last_spread - open_spread
        elif pos_par == -1:
            data_series = daily_spread.iloc[-open_len:, 4]
            last_spread = data_series.iloc[-1]
            trade_details.loc[count_num, 'closeTime'] = data_series.index[-1]
            trade_details.loc[count_num, 'closeSpread'] = last_spread
            trade_details.loc[count_num,
                              'profitSpread'] = open_spread - last_spread

    # 收益计算
    for order in trade_details.index:
        # order = trade_details.index[0]
        open_time = trade_details.loc[order, 'openTime']
        close_time = trade_details.loc[order, 'closeTime']
        contract_0 = underlying_list[0]
        contract_1 = underlying_list[1]
        if trade_details.loc[order, 'tradeDirection'] == 1:
            long_leg = (future_data.loc[close_time, contract_0 + '_b1'] -
                        future_data.loc[open_time, contract_0 + '_a1']) / \
                future_data.loc[open_time, contract_0 + '_a1']
            short_leg = -(future_data.loc[close_time, contract_1 + '_a1'] -
                          future_data.loc[open_time, contract_1 + '_b1']) / \
                future_data.loc[open_time, contract_1 + '_b1']
        else:
            long_leg = (future_data.loc[close_time, contract_1 + '_b1'] -
                        future_data.loc[open_time, contract_1 + '_a1']) / \
                future_data.loc[open_time, contract_1 + '_a1']
            short_leg = -(future_data.loc[close_time, contract_0 + '_a1'] -
                          future_data.loc[open_time, contract_0 + '_b1']) / \
                future_data.loc[open_time, contract_0 + '_b1']
        trade_details.loc[order, 'profitTrade'] = (long_leg + short_leg) / 2
        # trade_details['profitTrade'].mean()

    return trade_details