Esempio n. 1
0
def update_option(message=None):
    #Replace all contract infomation
    SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/option')
    all_instruments = rq.all_instruments(type="Option")
    all_instruments.to_sql("contract", SQL, if_exists='replace')

    for id, group in all_instruments.groupby(["underlying_order_book_id"]):
        #Update option contracts
        contracts = options.get_contracts(id)
        for contract in contracts:
            table = "daily_%s" % contract
            if is_table_exist(table, SQL):
                order_book_id_df = all_instruments[all_instruments["order_book_id"] == contract]
                maturity_date_string = order_book_id_df["maturity_date"].values[0]
                maturity_date_datetime = datetime.datetime(year=int(maturity_date_string[0:4]),
                                                           month=int(maturity_date_string[5:7]),
                                                           day=int(maturity_date_string[8:10]))
                if maturity_date_datetime > TODAY:
                    if get_started_date(table, SQL):
                        start_date_string = get_started_date(table, SQL)
                        df = rq.get_price(contract, start_date=start_date_string, end_date=TODAY_STR, frequency='1d')
                        write(df, table, SQL, if_exists='append')
                        print(u"已更新期权:%s 日线行情,开始时间: %s, 结束时间: %s" % (contract, start_date_string, TODAY_STR))
            else:
                df = rq.get_price(contract, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d')
                write(df, table, SQL, if_exists='replace')
                print(u"已更新期权:%s 日线行情,开始时间: %s, 结束时间: %s" % (contract, START_DATE_STR, TODAY_STR))
Esempio n. 2
0
    def kline(self,
              codes,
              period='1m',
              start=None,
              end=None,
              pre=200,
              type='future'):
        d = {}
        # 指数清单
        # 提前日期

        #start = self.getPreDate(period=period, num=pre, start=start)
        preDay = 1 if (pre == 0 or period == 'tick') else self.getPreDays(
            period, pre)
        # 开始日期
        if start is None:
            start = public.getDate(diff=-preDay)
        else:
            start = public.getDate(diff=-preDay, start=start.split(' ')[0])

        if end is None:
            end = public.getDate(diff=1)

        # 查询K线
        if period.find('d') > -1 and len(codes) > 1:
            # 替换查询线
            newCodes = self.alterCode(codes)
            res = rq.get_price(newCodes,
                               frequency=period,
                               start_date=start,
                               end_date=end,
                               adjust_type='pre')

            for c in newCodes:
                mc = c if not c in self.indexMap else self.indexMap[c]
                d[mc] = res.minor_xs(c)

        else:

            iMap = self.indexMap
            for c in codes:
                c0 = c1 = c
                if type == 'future':
                    c0 = self.parseCode(c)
                    c1 = iMap[c0] if c0 in iMap else c
                #print(c1)
                d[c] = d[c0] = rq.get_price(c1,
                                            frequency=period,
                                            start_date=start,
                                            end_date=end,
                                            adjust_type='pre')

        return d
Esempio n. 3
0
    def perf_update(self, weights, start_date, end_date):

        if self.kickout_list is not None or self.st_list is not None or self.suspended_list is not None:
            elimination_list = self.kickout_list+self.st_list+self.suspended_list
        else:
            elimination_list = None
        if self.clean_equity_list is None:
            sample_list = self.el
        else:
            sample_list = self.clean_equity_list+elimination_list
        if self.et is 'funds':
            period_prices = rqdatac.fund.get_nav(sample_list, start_date, end_date, fields='acc_net_value')
        elif self.et is 'stocks':
            period_prices = rqdatac.get_price(sample_list, start_date, end_date, frequency='1d', fields='close')
        period_daily_return_pct_change = pd.DataFrame(period_prices.pct_change())[1:]
        new_daily_arithmetic_return = period_daily_return_pct_change.multiply(weights).sum(axis=1)
        if self.daily_arithmetic_return is None:
            self.daily_arithmetic_return = new_daily_arithmetic_return
        else:
            self.daily_arithmetic_return = self.daily_arithmetic_return.append(new_daily_arithmetic_return)
        new_daily_cum_log_return = np.log(new_daily_arithmetic_return + 1).cumsum()
        if self.daily_cum_log_return is None:
            self.daily_cum_log_return = new_daily_cum_log_return
        else:
            self.daily_cum_log_return = self.daily_cum_log_return.append(new_daily_cum_log_return+self.daily_cum_log_return[-1])
        temp = np.log(self.daily_arithmetic_return + 1)
        self.annualized_vol = sqrt(244) * temp.std()
        days_count = (self.daily_arithmetic_return.index[-1] - self.daily_arithmetic_return.index[0]).days
        self.annualized_return = (self.daily_cum_log_return[-1] + 1) ** (365 / days_count) - 1
Esempio n. 4
0
def init_daily_lib():
    try:
        daily_lib = arctic['daily']
        print('The daily library is already exists.')
        return
    except LibraryNotFoundException:
        arctic.initialize_library('daily', lib_type=CHUNK_STORE)

    daily_lib = arctic['daily']
    start_date = '2000-01-04'

    for sid in tqdm(all_sid()):
        try:
            df = rq.get_price(
                sid,
                start_date=start_date,
                end_date=date.today(),
                frequency='1d',
                adjust_type='post')
            df.index.name = 'date'
            if len(df) > 0:
                daily_lib.write(sid, df, chunk_size='D')
        except Exception as e:
            # TODO: add logger later
            print(f'{sid}: {str(e)}')
Esempio n. 5
0
def get_earnings_to_price_ratio(latest_trading_date, recent_report_type,
                                market_cap_on_current_day):

    net_profit_ttm = get_ttm_sum(
        rqdatac.financials.income_statement.profit_before_tax,
        recent_report_type)

    stock_list = net_profit_ttm.index.tolist()

    stock_price = rqdatac.get_price(stock_list,
                                    start_date=latest_trading_date,
                                    end_date=latest_trading_date,
                                    fields='close',
                                    adjust_type='none').T

    shares = rqdatac.get_shares(stock_list,
                                start_date=latest_trading_date,
                                end_date=latest_trading_date,
                                fields='total').T

    earning_to_price = net_profit_ttm / (stock_price *
                                         shares)[str(latest_trading_date)]

    processed_earning_to_price = winsorization_and_market_cap_weighed_standardization(
        earning_to_price, market_cap_on_current_day[earning_to_price.index])

    return processed_earning_to_price
Esempio n. 6
0
 def __call__(self, path, fields, **kwargs):
     with h5py.File(path, 'w') as h5:
         i, step = 0, 300
         while True:
             order_book_ids = self._order_book_ids[i:i + step]
             df = rqdatac.get_price(order_book_ids,
                                    START_DATE,
                                    datetime.date.today(),
                                    '1d',
                                    adjust_type='none',
                                    fields=fields,
                                    expect_df=True)
             df.reset_index(inplace=True)
             df['datetime'] = [convert_date_to_int(d) for d in df['date']]
             del df['date']
             df.set_index(['order_book_id', 'datetime'], inplace=True)
             df.sort_index(inplace=True)
             for order_book_id in df.index.levels[0]:
                 h5.create_dataset(order_book_id,
                                   data=df.loc[order_book_id].to_records(),
                                   **kwargs)
             i += step
             yield len(order_book_ids)
             if i >= len(self._order_book_ids):
                 break
Esempio n. 7
0
def init_minute1_lib():
    try:
        minute1_lib = arctic['minute1']
        print('The minute1 library is already exists.')
        return
    except LibraryNotFoundException:
        arctic.initialize_library('minute1')

    minute1_lib = arctic['minute1']
    start_date = '2018-01-01'

    for sid in tqdm(all_sid()):
        try:
            df = rq.get_price(sid,
                              start_date=start_date,
                              end_date=date.today(),
                              frequency='1m',
                              adjust_type='post')
            df.index.name = 'date'
            if len(df) > 0:
                minute1_lib.write(sid, df)
        except Exception as e:
            # TODO: add logger later
            print(f'{sid}: {str(e)}')
    minute1_lib.snapshot(str(date.today()))
Esempio n. 8
0
 def compute(self, begin_date=None, end_date=None):
     begin_date = datetime.strptime(begin_date,
                                    '%Y%m%d') - timedelta(days=20)
     all_filter_codes = filter_stock_pool(begin_date)
     for code in all_filter_codes:
         df_daily = rqd.get_price(code, begin_date, end_date)
         df_daily['ma10'] = df_daily['close'].rolling(10).mean()
         df_daily['delta'] = df_daily['close'] - df_daily['ma10']
         df_daily['delta_pre'] = df_daily['delta'].shift(1)
         df_daily = df_daily[(df_daily['delta'] < 0)
                             & (df_daily['delta_pre'] > 0)]
         update_requests = []
         for date in df_daily.index:
             date = date.strftime('%Y%m%d')
             update_requests.append(
                 UpdateOne({
                     'code': code,
                     'date': date
                 }, {'$set': {
                     'code': code,
                     'date': date
                 }},
                           upsert=True))
         if len(update_requests) > 0:
             update_result = self.collection.bulk_write(update_requests)
             print('保存-%s-%s数据 , 插入:%4d , 更新:%4d' %
                   (code, self.name, update_result.upserted_count,
                    update_result.modified_count),
                   flush=True)
Esempio n. 9
0
def get_momentum(stock_list, date, market_cap_on_current_day):

    trading_date_525_before = rqdatac.get_trading_dates(date - timedelta(days=1000), date, country='cn')[-525]

    trading_date_21_before = rqdatac.get_trading_dates(date - timedelta(days=40), date, country='cn')[-21]

    # 共需要 525 - 21 = 504 个交易日的收益率

    exp_weight = get_exponential_weight(half_life=126, length=504)

    # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0)

    daily_return = rqdatac.get_price(stock_list, trading_date_525_before, trading_date_21_before, frequency='1d',
                                     fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index

    daily_return = daily_return.drop(daily_return[inds], axis=1)

    # 把复利无风险日收益率转为日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(start_date=trading_date_525_before, end_date=date, tenor='0S')

    risk_free_return = (((1 + compounded_risk_free_return) ** (1 / 365)) - 1).loc[daily_return.index]

    relative_strength = np.log(1 + daily_return).T.subtract(np.log(1 + risk_free_return.iloc[:, 0])).dot(exp_weight)

    processed_relative_strength = winsorization_and_market_cap_weighed_standardization(relative_strength,
                                                                                       market_cap_on_current_day[
                                                                                           relative_strength.index])

    return processed_relative_strength
Esempio n. 10
0
def download_minute_bar(vt_symbol):
    """下载某一合约的分钟线数据"""
    print(f"开始下载合约数据{vt_symbol}")
    symbol, exchange = vt_symbol.split(".")

    start = time()

    df = rq.get_price(symbol,
                      frequency="1m",
                      fields=FIELDS,
                      start_date='20100416',
                      end_date='20190416')

    bars = []
    for ix, row in df.iterrows():
        bar = generate_bar_from_row(row, symbol, exchange)
        bars.append(bar)

    database_manager.save_bar_data(bars)

    end = time()
    cost = (end - start) * 1000

    print("合约%s的分钟K线数据下载完成%s - %s,耗时%s毫秒" %
          (symbol, df.index[0], df.index[-1], cost))
Esempio n. 11
0
def downloadDailyBarBySymbol(symbol):
    """下载某一合约日线数据"""
    start = time()

    cl = db2[symbol]
    cl.ensure_index([('datetime', ASCENDING)], unique=True)  # 添加索引

    #df = rq.get_price(symbol, frequency='1d', fields=FIELDS, start_date='1900-1-1', end_date= '2013-01-04' )
    df = rq.get_price(symbol,
                      frequency='1d',
                      fields=FIELDS,
                      start_date='2013-1-4',
                      end_date=datetime.now().strftime('%Y%m%d'))
    #end_date=datetime.now().strftime('%Y%m%d'))

    for ix, row in df.iterrows():
        bar = generateVtBar(row, symbol)
        d = bar.__dict__
        flt = {'datetime': bar.datetime}
        cl.replace_one(flt, d, True)

    end = time()
    cost = (end - start) * 1000
    if df.empty == True:
        pass
    else:
        print(u'合约%s数据下载完成%s - %s,耗时%s毫秒' %
              (symbol, df.index[0], df.index[-1], cost))
Esempio n. 12
0
def get_cash_earnings_to_price_ratio(latest_trading_date, recent_report_type,
                                     market_cap_on_current_day):

    cash_ttm = get_ttm_sum(
        rqdatac.financials.cash_flow_statement.
        cash_flow_from_operating_activities, recent_report_type)

    stock_list = cash_ttm.index.tolist()

    stock_price = rqdatac.get_price(stock_list,
                                    start_date=latest_trading_date,
                                    end_date=latest_trading_date,
                                    fields='close',
                                    adjust_type='none').T

    shares = rqdatac.get_shares(stock_list,
                                start_date=latest_trading_date,
                                end_date=latest_trading_date,
                                fields='total').T

    cash_earning_to_price = cash_ttm / (stock_price *
                                        shares)[str(latest_trading_date)]

    processed_cash_earning_to_price = winsorization_and_market_cap_weighed_standardization(
        cash_earning_to_price,
        market_cap_on_current_day[cash_earning_to_price.index])

    return processed_cash_earning_to_price
Esempio n. 13
0
def update_minute_lib():
    minute_lib = arctic['minute']
    last_index = minute_lib.read(
        '000001.XSHG', columns=['close'], chunk_range=('2018-09-20',
                                                       None)).index[-1]
    if last_index.time() == time(15):
        start_date = rq.get_next_trading_date(last_index)
    else:
        start_date = last_index.date()

    for sid in tqdm(all_sid()):
        try:
            df = rq.get_price(
                sid,
                start_date=start_date,
                end_date=date.today(),
                frequency='1m',
                adjust_type='post')
            df.index.name = 'date'
            if len(df) > 0:
                minute_lib.update(
                    sid, df, chunk_range=(start_date, None), upsert=True)
        except Exception as e:
            # TODO: add logger later
            print(f'{sid}: {str(e)}')
Esempio n. 14
0
def update_minute1_lib():
    minute1_lib = arctic['minute1']
    last_index = minute1_lib.read('000001.XSHG').data.index[-1]

    if last_index.time() == time(15):
        start_date = rq.get_next_trading_date(last_index)

        if not rq.get_trading_dates(start_date, date.today()):
            print('DB is already up to date.')
            return

        for sid in tqdm(all_sid()):
            try:
                df = rq.get_price(sid,
                                  start_date=start_date,
                                  end_date=date.today(),
                                  frequency='1m',
                                  adjust_type='post')
                df.index.name = 'date'
                if len(df) > 0:
                    minute1_lib.append(sid, df, upsert=True)
            except Exception as e:
                # TODO: add logger later
                print(f'{sid}: {str(e)}')
    else:
        # read previous version, and get last index to compute start date
        previous_version = minute1_lib.list_versions('000001.XSHG')[1]
        last_index = minute1_lib.read(
            '000001.XSHG', as_of=previous_version['version']).data.index[-1]
        start_date = rq.get_next_trading_date(last_index)

        for sid in tqdm(all_sid()):
            try:
                df = rq.get_price(sid,
                                  start_date=start_date,
                                  end_date=date.today(),
                                  frequency='1m',
                                  adjust_type='post')
                df.index.name = 'date'
                pre_version_number = minute1_lib.list_versions(
                    sid)[1]['version']
                minute1_lib.restore_version(sid, pre_version_number)
                if len(df) > 0:
                    minute1_lib.append(sid, df, upsert=True)
            except Exception as e:
                # TODO: add logger later
                print(f'{sid}: {str(e)}')
def get_main_contract_price(future):
    future_name = future + "88"
    test_data = rq.get_price(future_name,
                             end_date=time_of_today,
                             frequency='1m')
    test_data = pd.DataFrame(test_data)
    save_file_name = future + "0000" + ".csv"
    test_data.to_csv(DATA_PATH + save_file_name)
Esempio n. 16
0
def suspended_stocks_filter(stocks,date):
    """
    :param stocks:股票列表
    :param date: 检验停牌日期
    :return: list
    """
    volume = rqdatac.get_price(stocks,start_date=date,end_date=date,fields="volume").iloc[0]
    return volume[volume>0].index.tolist()
Esempio n. 17
0
def update_future(message=None):
    all_instruments = rq.all_instruments(type="Future")
    sql.insert(all_instruments, "future/contract", "replace")
    futures = ["CU", "M", "SR"]
    for i in futures:
        data = all_instruments[all_instruments.underlying_symbol == i]
        data = data[data.symbol.str.contains("主力连续")]
        ids = data.order_book_id
        for id in ids:
            if message:
                message.append(u"正在更新期货合约:%s 主力连续日线数据" % id)
                message.update()
            else:
                print(u"正在更新期货合约:%s 主力连续日线数据" % id)
            table = "future/contracts/%s" % id
            if sql.is_table(table):
                if is_future_contract_need_update(table):
                    start_date_string = get_started_date(table)
                    df = rq.get_price(id,
                                      start_date=start_date_string,
                                      end_date=TODAY_STR,
                                      frequency='1d')
                    if not sql.insert(
                            df, table, "append", index=True,
                            index_label="date"):
                        df = rq.get_price(id,
                                          start_date=START_DATE_STR,
                                          end_date=TODAY_STR,
                                          frequency='1d')
                        sql.insert(df,
                                   table,
                                   "replace",
                                   index=True,
                                   index_label="date")
            else:
                df = rq.get_price(id,
                                  start_date=START_DATE_STR,
                                  end_date=TODAY_STR,
                                  frequency='1d')
                sql.insert(df,
                           table,
                           "replace",
                           index=True,
                           index_label="date")

    return
Esempio n. 18
0
def get_implicit_factor_return(date):

    latest_trading_date = str(
        rqdatac.get_previous_trading_date(
            datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1)))

    previous_trading_date = str(
        rqdatac.get_previous_trading_date(latest_trading_date))

    # 取前一交易日全市场已经上市的股票,保证日收益率计算

    stock_list = rqdatac.all_instruments(
        type='CS', date=previous_trading_date)['order_book_id'].tolist()

    # 剔除上市不满21天的股票
    trading_date_21_days_before = str(
        rqdatac.get_previous_trading_date(latest_trading_date,
                                          country='cn',
                                          n=21))

    stock_list = [
        i for i in stock_list
        if rqdatac.instruments(i).listed_date <= trading_date_21_days_before
    ]

    # 剔除ST股
    is_st_df = rqdatac.is_st_stock(stock_list,
                                   start_date=previous_trading_date,
                                   end_date=previous_trading_date)

    is_st_df.index = is_st_df.index.astype(str)

    stock_list = is_st_df.loc[previous_trading_date][
        is_st_df.loc[previous_trading_date].values == False].index.tolist()

    # 剔除停牌股
    trading_volume = rqdatac.get_price(stock_list,
                                       start_date=previous_trading_date,
                                       end_date=previous_trading_date,
                                       frequency='1d',
                                       fields='volume',
                                       country='cn')

    stock_list = trading_volume.loc[previous_trading_date][
        trading_volume.loc[previous_trading_date].values > 0].index.tolist()

    # 计算全市场前一交易日的行业暴露度

    factor_exposure = get_exposure(stock_list, str(previous_trading_date))

    # 根据上述四类暴露度计算因子收益率

    factor_returns = factor_return_estimation(latest_trading_date,
                                              factor_exposure)

    return factor_returns
Esempio n. 19
0
    def data_preprocessing(self, equity_list, start_date, end_date):

        if self.et is 'funds':
            period_data = rqdatac.fund.get_nav(equity_list, start_date, end_date, fields='acc_net_value')
        elif self.et is 'stocks':
            period_data = rqdatac.get_price(equity_list, start_date, end_date, frequency='1d', fields=['close', 'volume'])
        period_prices = period_data['close']
        period_volume = period_data['volume']
        self.period_prices = period_prices
        # Set up the threshhold of elimination
        out_threshold = ceil(period_prices.shape[0] / 2)
        reset_end_date = pd.to_datetime(end_date)
        reset_start_date = pd.to_datetime(start_date)
        kickout_list = list()
        suspended_list = list()
        # Locate the first valid value of each column, if available sequence length is less than threshhold, add
        # the column name into out_list; if sequence length is longer than threshold but less than chosen period length,
        # reset the start_date to the later date. The latest start_date whose sequence length is greater than threshold
        # will be chose.
        # Check whether any stocks has long suspended trading periods or has been delisted and generate list
        # for such stocks
        for i in equity_list:
            if not period_volume.loc[:, i].value_counts().empty:
                if ((reset_end_date - period_prices.loc[:, i].first_valid_index()) / np.timedelta64(1, 'D')) < out_threshold:
                    kickout_list.append(i)
                elif period_prices.loc[:, i].first_valid_index() < reset_start_date:
                    reset_start_date = period_prices.loc[:, i].first_valid_index()
                elif period_volume.loc[:, i].last_valid_index() < reset_end_date or \
                                period_volume.loc[:, i].value_counts().iloc[0] >= out_threshold:
                    suspended_list.append(i)
            else:
                kickout_list.append(i)
        # Check whether any ST stocks are included and generate a list for ST stocks
        st_list = list(period_prices.columns.values[rqdatac.is_st_stock(equity_list, reset_start_date, reset_end_date).sum(axis=0)>0])
        # Generate final kickout list which includes all the above

        # kickout_list_s = set(kickout_list)
        # st_list_s = set(st_list)
        # suspended_list_s = set(suspended_list)
        # two_list_union = st_list_s.union(suspended_list_s)
        # final_dif = two_list_union - kickout_list_s
        # final_kickout_list = kickout_list + list(final_dif)
        final_kickout_list = list(set().union(kickout_list, st_list, suspended_list))

        # Generate clean data
        equity_list_s = set(equity_list)
        final_kickout_list_s = set(final_kickout_list)
        clean_equity_list = list(equity_list_s - final_kickout_list_s)
        clean_period_prices = period_prices.loc[reset_start_date:reset_end_date, clean_equity_list]
        self.clean_period_prices = clean_period_prices
        self.clean_equity_list = list(clean_period_prices.columns.values)
        self.kickout_list = kickout_list
        self.st_list = st_list
        self.suspended_list = suspended_list
        self.reset_start_d = reset_start_date
        self.reset_end_d = reset_end_date
Esempio n. 20
0
def get_option_price_each_day(_date, all_ids_) -> pd.Series:
    price_ = rqdatac.get_price(all_ids_, _date, _date, expect_df=True)
    if price_ is not None:
        price_ = price_['close'].reset_index(level=1,
                                             drop=True).rename('option_price')
        msg = 'Option price data missing'
        check_if_missing_items(price_.index.tolist(), all_ids_, msg)
        return price_
    else:
        return pd.Series()
Esempio n. 21
0
def get_cumulative_range(stock_list, date, market_cap_on_current_day):

    trading_date_253_before = rqdatac.get_trading_dates(date -
                                                        timedelta(days=500),
                                                        date,
                                                        country='cn')[-253]

    daily_return = rqdatac.get_price(
        stock_list,
        trading_date_253_before,
        date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index

    daily_return = daily_return.drop(daily_return[inds], axis=1)

    # 把复利无风险日收益率转为日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=trading_date_253_before, end_date=date, tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[daily_return.index]

    # 每21个交易日为一个时间区间

    spliting_points = np.arange(0, 273, 21)

    cumulative_return = pd.DataFrame()

    for period in range(1, len(spliting_points)):

        compounded_return = (
            (1 + daily_return.iloc[spliting_points[0]:spliting_points[period]]
             ).cumprod() - 1).iloc[-1]

        compounded_risk_free_return = (
            (1 +
             risk_free_return.iloc[spliting_points[0]:spliting_points[period]]
             ).cumprod() - 1).iloc[-1]

        cumulative_return[period] = np.log(1 + compounded_return).subtract(
            np.log(1 + compounded_risk_free_return.iloc[0]))

    cumulative_return = cumulative_return.cumsum(axis=1)

    processed_cumulative_range = winsorization_and_market_cap_weighed_standardization(
        cumulative_return.T.max() - cumulative_return.T.min(),
        market_cap_on_current_day)

    return processed_cumulative_range
def data_process(order_book_ids, equity_type, start_date):

    windows = 132
    end_date = rqdatac.get_previous_trading_date(start_date)
    for i in range(windows + 1):
        start_date = rqdatac.get_previous_trading_date(start_date)

    if equity_type is 'funds':
        period_data = rqdatac.fund.get_nav(order_book_ids,
                                           start_date,
                                           end_date,
                                           fields='acc_net_value')
    elif equity_type is 'stocks':
        period_data = rqdatac.get_price(order_book_ids,
                                        start_date,
                                        end_date,
                                        frequency='1d',
                                        fields=['close', 'volume'])
    period_prices = period_data['close']
    period_volume = period_data['volume']
    # Set up the threshhold of elimination
    out_threshold = ceil(period_prices.shape[0] / 2)
    kickout_list = list()
    suspended_list = list()
    # Locate the first valid value of each column, if available sequence length is less than threshhold, add
    # the column name into out_list; if sequence length is longer than threshold but less than chosen period length,
    # reset the start_date to the later date. The latest start_date whose sequence length is greater than threshold
    # will be chose.
    # Check whether any stocks has long suspended trading periods or has been delisted and generate list
    # for such stocks
    for i in order_book_ids:
        if not period_volume.loc[:, i].value_counts().empty:
            if ((end_date - period_prices.loc[:, i].first_valid_index()) / np.timedelta64(1, 'D')) \
                    < out_threshold:
                kickout_list.append(i)
            elif period_prices.loc[:, i].first_valid_index() < start_date:
                reset_start_date = period_prices.loc[:, i].first_valid_index()
            elif period_volume.loc[:, i].last_valid_index() < end_date or \
                            period_volume.loc[:, i].value_counts().iloc[0] >= out_threshold:
                suspended_list.append(i)
        else:
            kickout_list.append(i)
    # Check whether any ST stocks are included and generate a list for ST stocks
    st_list = list(period_prices.columns.values[rqdatac.is_st_stock(
        order_book_ids, reset_start_date, end_date).sum(axis=0) > 0])
    # Generate final kickout list which includes all the above
    final_kickout_list = list(set().union(kickout_list, st_list,
                                          suspended_list))
    # Generate clean data
    order_book_ids_s = set(order_book_ids)
    final_kickout_list_s = set(final_kickout_list)
    clean_order_book_ids = list(order_book_ids_s - final_kickout_list_s)
    clean_period_prices = period_prices.loc[reset_start_date:end_date,
                                            clean_order_book_ids]
    return clean_period_prices, final_kickout_list
Esempio n. 23
0
def get_daily_excess_return(stock_list, start_date, end_date):

    # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0)

    stock_daily_return = rqdatac.get_price(
        stock_list,
        rqdatac.get_previous_trading_date(start_date),
        end_date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    # 剔除收益率数据存在空值的股票

    inds = stock_daily_return.isnull().sum()[
        stock_daily_return.isnull().sum() > 0].index

    filtered_stock_daily_return = stock_daily_return.drop(inds, axis=1)

    # 经测试发现,中证全指(000985)作为 market portfolio 的效果最好

    market_portfolio_daily_return = rqdatac.get_price(
        '000985.XSHG',
        rqdatac.get_previous_trading_date(start_date),
        end_date,
        frequency='1d',
        fields='close').pct_change()[1:]

    # 计算无风险日收益率

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=start_date, end_date=end_date, tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[filtered_stock_daily_return.index]

    daily_excess_return = filtered_stock_daily_return.T.subtract(
        risk_free_return.iloc[:, 0]).T

    market_portfolio_daily_excess_return = market_portfolio_daily_return.subtract(
        risk_free_return.iloc[:, 0])

    return daily_excess_return, market_portfolio_daily_excess_return
Esempio n. 24
0
def get_tick(contract_id, start_date, end_date):
    """
    tick数据下载
    :param contract_id:
    :param start_date:
    :param end_date:
    :return:
    """
    df_tick = rq.get_price(contract_id, start_date=start_date, end_date=end_date, frequency='tick', fields=None)

    return df_tick.between_time('09:25:00', '15:10:00')
Esempio n. 25
0
def get_stock_beta(stock_list, stock_excess_return, benchmark,
                   latest_trading_date, market_cap_on_current_day):

    trading_date_253_before = rqdatac.get_trading_dates(latest_trading_date -
                                                        timedelta(days=500),
                                                        latest_trading_date,
                                                        country='cn')[-253]

    exp_weight = get_exponential_weight(half_life=63, length=252)

    weighted_stock_excess_return = stock_excess_return.T.multiply(exp_weight).T

    compounded_risk_free_return = rqdatac.get_yield_curve(
        start_date=trading_date_253_before,
        end_date=latest_trading_date,
        tenor='3M')

    risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) -
                        1).loc[stock_excess_return.index]

    market_portfolio_daily_return = rqdatac.get_price(
        benchmark,
        trading_date_253_before,
        latest_trading_date,
        frequency='1d',
        fields='close').fillna(method='ffill').pct_change()[1:]

    market_portfolio_excess_return = market_portfolio_daily_return.subtract(
        risk_free_return.iloc[:, 0])

    weighted_market_portfolio_excess_return = market_portfolio_excess_return.multiply(
        exp_weight).T

    weighted_market_portfolio_variance = weighted_market_portfolio_excess_return.var(
    )

    beta = [
        weighted_market_portfolio_excess_return.cov(
            weighted_stock_excess_return[stock]) /
        weighted_market_portfolio_variance
        for stock in stock_excess_return.columns
    ]

    stock_beta = pd.Series(beta, index=stock_excess_return.columns)

    # 用回归方法处理 beta 的缺失值

    imputed_stock_beta = individual_factor_imputation(
        stock_list, stock_beta, market_cap_on_current_day,
        latest_trading_date.strftime('%Y-%m-%d'))

    return imputed_stock_beta
Esempio n. 26
0
def future_data_load(args, start_date=None, end_date=None):
    """
    期货分钟数据
    :param args: 合约list or 合约str
    :param start_date: 开始日期
    :param end_date: 结束日期
    :return:
    """
    if type(args) is str:
        contract_id = args
        data_dict = {contract_id: pd.DataFrame()}
        try:
            contract_data = rq.get_price(contract_id,
                                         start_date=start_date,
                                         end_date=end_date,
                                         frequency='1m',
                                         fields=None)
        except AttributeError:
            print(contract_id)
            raise AttributeError

        data_dict[contract_id] = contract_data
    else:
        data_dict = dict()

        for contract_id in args:
            try:
                contract_data = rq.get_price(contract_id,
                                             start_date=start_date,
                                             end_date=end_date,
                                             frequency='1m',
                                             fields=None)
            except AttributeError:
                print(contract_id)
                raise AttributeError
            data_dict[contract_id] = contract_data

    return data_dict
Esempio n. 27
0
def update_future():
    SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/future')
    all_instruments = rq.all_instruments(type="Future")
    all_instruments.to_sql("contract", SQL, if_exists='replace')
    futures = ["CU", "M", "SR"]
    for i in futures:
        data = all_instruments[all_instruments.underlying_symbol == i]
        data = data[data.symbol.str.contains("主力连续")]
        ids = data.order_book_id
        for id in ids:
            table = "daily_%s" % id
            if is_table_exist(table, SQL):
                start_date_string = get_started_date(table, SQL)
                if start_date_string:
                    df = rq.get_price(id, start_date=start_date_string, end_date=TODAY_STR, frequency='1d')
                    write(df, table, SQL, if_exists='append')
                    print(u"已更新期货合约:%s 主力连续日线数据, 开始时间: %s, 结束时间: %s" % (id, start_date_string, TODAY_STR))
            else:
                df = rq.get_price(id, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d')
                write(df, table, SQL, if_exists='replace')
                print(u"已更新期货合约:%s 主力连续日线数据, 开始时间: %s, 结束时间: %s" % (id, START_DATE_STR, TODAY_STR))

    return
Esempio n. 28
0
def update_stock():

    #SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/stock')
    SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/stock')
    all_instruments = rq.all_instruments(type="CS")
    all_instruments.to_sql("contract", SQL, if_exists='replace')

    contracts_df = pd.read_sql_query('select * from "contract"', con=SQL)
    ids = contracts_df["order_book_id"]
    for id in ids:
        table = "daily_%s" % id
        if is_table_exist(table, SQL):
            start_date_string = get_started_date(table, SQL)
            if start_date_string:
                df = rq.get_price(id, start_date=start_date_string, end_date=TODAY_STR, frequency='1d')
                write(df, table, SQL, if_exists='append')
                print(u"已更新股票:%s 日线行情,开始时间: %s, 结束时间: %s" % (id, start_date_string, TODAY_STR))
        else:
            df = rq.get_price(id, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d')
            write(df, table, SQL, if_exists='replace')
            print(u"已更新股票:%s 日线行情,开始时间: %s, 结束时间: %s" % (id, START_DATE_STR, TODAY_STR))

    return
Esempio n. 29
0
def get_price_and_save_to_csv(a, temp):
    temp_path = DATA_PATH + temp
    for name, group in a:
        future_name = name
        start_date = group.index[0].strftime('%Y%m%d')
        end_date = group.index[-1].strftime('%Y%m%d')
        print future_name
        print start_date
        print end_date
        test_data = rq.get_price(future_name,
                                 start_date=start_date,
                                 end_date=end_date,
                                 frequency='1m')
        test_data = pd.DataFrame(test_data)
        save_file_name = future_name + ".csv"
        test_data.to_csv(temp_path + "\\" + save_file_name)
Esempio n. 30
0
def get_explicit_factor_returns(date):
    """
    :param date:日期
    :return: pandas.Series
    """

    previous_trading_date = rqdatac.get_previous_trading_date(date)

    all_a_stocks = rqdatac.all_instruments(type="CS",date=previous_trading_date).order_book_id.tolist()
    filtered_stocks = noisy_stocks_filter(all_a_stocks,previous_trading_date)
    # print(all_a_stocks,previous_trading_date)
    factor_exposures = rqdatac.get_style_factor_exposure(all_a_stocks, previous_trading_date, previous_trading_date, "all").sort_index()
    factor_exposures.index=factor_exposures.index.droplevel(1)

    closePrice = rqdatac.get_price(all_a_stocks, rqdatac.get_previous_trading_date(previous_trading_date),
                                   previous_trading_date, fields="close")
    priceChange = closePrice.pct_change().iloc[-1]

    index_mapping = {"csi_300":'000300.XSHG',"csi_500":"000905.XSHG","csi_800":"000906.XSHG"}
    all_stocks = {index:rqdatac.index_components(index_mapping.get(index),date=previous_trading_date) for index in index_mapping}
    all_stocks['whole_market'] = filtered_stocks

    def _calc_explicitReturns_with_stocksList(stocksList):
        # 根据股票池计算收益率
        _sizeBeta = factor_exposures[['size','beta']].loc[stocksList]

        _quantileGroup = _sizeBeta.apply(lambda x:pd.cut(x,bins=3,labels=False)+1).reset_index()
        _quantileStocks = _quantileGroup.groupby(['size','beta']).apply(lambda x:x.index.tolist())
        market_neutralize_stocks = _quantileStocks.apply(
            lambda x: pd.Series(stocksList).loc[x].values.tolist()).values.tolist()
        return factor_exposures.loc[stocksList].apply(lambda x,y=market_neutralize_stocks:_calc_single_explicit_returns(x,y))

    def _calc_single_explicit_returns(_factor_exposure,market_neutralize_stocks):
        # 计算单一因子收益率
        def _deuce(series):
            median = series.median()
            return [series[series<=median].index.tolist(),series[series>median].index.tolist()]

        deuceResults = np.array([_deuce(_factor_exposure[neutralized_stks]) for neutralized_stks in market_neutralize_stocks]).flatten()

        short_stocksList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==0])))
        long_stockList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==1])))

        return priceChange[long_stockList].mean() - priceChange[short_stocksList].mean()

    results = {key: _calc_explicitReturns_with_stocksList(all_stocks.get(key)) for key in all_stocks}
    return pd.DataFrame(results)[['whole_market','csi_300','csi_500','csi_800']]
Esempio n. 31
0
def downloadDailyBarBySymbol(symbol):
    """下载某一合约日线数据"""
    start = time()

    cl = db2[symbol]
    cl.ensure_index([('datetime', ASCENDING)], unique=True)         # 添加索引
    
    df = rq.get_price(symbol, frequency='1d', fields=FIELDS, end_date=datetime.now().strftime('%Y%m%d'))
    
    for ix, row in df.iterrows():
        bar = generateVtBar(row, symbol)
        d = bar.__dict__
        flt = {'datetime': bar.datetime}
        cl.replace_one(flt, d, True)            

    end = time()
    cost = (end - start) * 1000

    print(u'合约%s数据下载完成%s - %s,耗时%s毫秒' %(symbol, df.index[0], df.index[-1], cost))
Esempio n. 32
0
def downloadMinuteBarBySymbol(symbol):
    """下载某一合约的分钟线数据"""
    start = time()

    cl = dbMinute[symbol]
    cl.ensure_index([('datetime', ASCENDING)], unique=True)         # 添加索引
    
    df = rq.get_price(symbol, frequency='1m', fields=FIELDS)
    
    for ix, row in df.iterrows():
        bar = generateVtBar(row, symbol)
        d = bar.__dict__
        flt = {'datetime': bar.datetime}
        cl.replace_one(flt, d, True)            

    end = time()
    cost = (end - start) * 1000

    print(u'合约%s的分钟K线数据下载完成%s - %s,耗时%s毫秒' %(symbol, df.index[0], df.index[-1], cost))
Esempio n. 33
0
def download_minute_bar(vt_symbol):
    """下载某一合约的分钟线数据"""
    print(f"开始下载合约数据{vt_symbol}")
    symbol, exchange = vt_symbol.split(".")

    start = time()

    df = rq.get_price(symbol, frequency="1m", fields=FIELDS)

    with DB.atomic():
        for ix, row in df.iterrows():
            print(row.name)
            bar = generate_bar_from_row(row, symbol, exchange)
            DbBarData.replace(bar.__data__).execute()

    end = time()
    cost = (end - start) * 1000

    print(
        "合约%s的分钟K线数据下载完成%s - %s,耗时%s毫秒"
        % (symbol, df.index[0], df.index[-1], cost)
    )
Esempio n. 34
0
def downloadTickBySymbol(symbol, date):
    """下载某一合约日线数据"""
    start = time()

    cl = dbTick[symbol]
    cl.ensure_index([('datetime', ASCENDING)], unique=True)         # 添加索引
    
    df = rq.get_price(symbol, 
                      frequency='tick', 
                      start_date=date, 
                      end_date=date)
    
    for ix, row in df.iterrows():
        tick = generateVtTick(row, symbol)
        d = tick.__dict__
        flt = {'datetime': tick.datetime}
        cl.replace_one(flt, d, True)            

    end = time()
    cost = (end - start) * 1000

    print(u'合约%sTick数据下载完成%s - %s,耗时%s毫秒' %(symbol, df.index[0], df.index[-1], cost))