def update_option(message=None): #Replace all contract infomation SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/option') all_instruments = rq.all_instruments(type="Option") all_instruments.to_sql("contract", SQL, if_exists='replace') for id, group in all_instruments.groupby(["underlying_order_book_id"]): #Update option contracts contracts = options.get_contracts(id) for contract in contracts: table = "daily_%s" % contract if is_table_exist(table, SQL): order_book_id_df = all_instruments[all_instruments["order_book_id"] == contract] maturity_date_string = order_book_id_df["maturity_date"].values[0] maturity_date_datetime = datetime.datetime(year=int(maturity_date_string[0:4]), month=int(maturity_date_string[5:7]), day=int(maturity_date_string[8:10])) if maturity_date_datetime > TODAY: if get_started_date(table, SQL): start_date_string = get_started_date(table, SQL) df = rq.get_price(contract, start_date=start_date_string, end_date=TODAY_STR, frequency='1d') write(df, table, SQL, if_exists='append') print(u"已更新期权:%s 日线行情,开始时间: %s, 结束时间: %s" % (contract, start_date_string, TODAY_STR)) else: df = rq.get_price(contract, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d') write(df, table, SQL, if_exists='replace') print(u"已更新期权:%s 日线行情,开始时间: %s, 结束时间: %s" % (contract, START_DATE_STR, TODAY_STR))
def kline(self, codes, period='1m', start=None, end=None, pre=200, type='future'): d = {} # 指数清单 # 提前日期 #start = self.getPreDate(period=period, num=pre, start=start) preDay = 1 if (pre == 0 or period == 'tick') else self.getPreDays( period, pre) # 开始日期 if start is None: start = public.getDate(diff=-preDay) else: start = public.getDate(diff=-preDay, start=start.split(' ')[0]) if end is None: end = public.getDate(diff=1) # 查询K线 if period.find('d') > -1 and len(codes) > 1: # 替换查询线 newCodes = self.alterCode(codes) res = rq.get_price(newCodes, frequency=period, start_date=start, end_date=end, adjust_type='pre') for c in newCodes: mc = c if not c in self.indexMap else self.indexMap[c] d[mc] = res.minor_xs(c) else: iMap = self.indexMap for c in codes: c0 = c1 = c if type == 'future': c0 = self.parseCode(c) c1 = iMap[c0] if c0 in iMap else c #print(c1) d[c] = d[c0] = rq.get_price(c1, frequency=period, start_date=start, end_date=end, adjust_type='pre') return d
def perf_update(self, weights, start_date, end_date): if self.kickout_list is not None or self.st_list is not None or self.suspended_list is not None: elimination_list = self.kickout_list+self.st_list+self.suspended_list else: elimination_list = None if self.clean_equity_list is None: sample_list = self.el else: sample_list = self.clean_equity_list+elimination_list if self.et is 'funds': period_prices = rqdatac.fund.get_nav(sample_list, start_date, end_date, fields='acc_net_value') elif self.et is 'stocks': period_prices = rqdatac.get_price(sample_list, start_date, end_date, frequency='1d', fields='close') period_daily_return_pct_change = pd.DataFrame(period_prices.pct_change())[1:] new_daily_arithmetic_return = period_daily_return_pct_change.multiply(weights).sum(axis=1) if self.daily_arithmetic_return is None: self.daily_arithmetic_return = new_daily_arithmetic_return else: self.daily_arithmetic_return = self.daily_arithmetic_return.append(new_daily_arithmetic_return) new_daily_cum_log_return = np.log(new_daily_arithmetic_return + 1).cumsum() if self.daily_cum_log_return is None: self.daily_cum_log_return = new_daily_cum_log_return else: self.daily_cum_log_return = self.daily_cum_log_return.append(new_daily_cum_log_return+self.daily_cum_log_return[-1]) temp = np.log(self.daily_arithmetic_return + 1) self.annualized_vol = sqrt(244) * temp.std() days_count = (self.daily_arithmetic_return.index[-1] - self.daily_arithmetic_return.index[0]).days self.annualized_return = (self.daily_cum_log_return[-1] + 1) ** (365 / days_count) - 1
def init_daily_lib(): try: daily_lib = arctic['daily'] print('The daily library is already exists.') return except LibraryNotFoundException: arctic.initialize_library('daily', lib_type=CHUNK_STORE) daily_lib = arctic['daily'] start_date = '2000-01-04' for sid in tqdm(all_sid()): try: df = rq.get_price( sid, start_date=start_date, end_date=date.today(), frequency='1d', adjust_type='post') df.index.name = 'date' if len(df) > 0: daily_lib.write(sid, df, chunk_size='D') except Exception as e: # TODO: add logger later print(f'{sid}: {str(e)}')
def get_earnings_to_price_ratio(latest_trading_date, recent_report_type, market_cap_on_current_day): net_profit_ttm = get_ttm_sum( rqdatac.financials.income_statement.profit_before_tax, recent_report_type) stock_list = net_profit_ttm.index.tolist() stock_price = rqdatac.get_price(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='close', adjust_type='none').T shares = rqdatac.get_shares(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='total').T earning_to_price = net_profit_ttm / (stock_price * shares)[str(latest_trading_date)] processed_earning_to_price = winsorization_and_market_cap_weighed_standardization( earning_to_price, market_cap_on_current_day[earning_to_price.index]) return processed_earning_to_price
def __call__(self, path, fields, **kwargs): with h5py.File(path, 'w') as h5: i, step = 0, 300 while True: order_book_ids = self._order_book_ids[i:i + step] df = rqdatac.get_price(order_book_ids, START_DATE, datetime.date.today(), '1d', adjust_type='none', fields=fields, expect_df=True) df.reset_index(inplace=True) df['datetime'] = [convert_date_to_int(d) for d in df['date']] del df['date'] df.set_index(['order_book_id', 'datetime'], inplace=True) df.sort_index(inplace=True) for order_book_id in df.index.levels[0]: h5.create_dataset(order_book_id, data=df.loc[order_book_id].to_records(), **kwargs) i += step yield len(order_book_ids) if i >= len(self._order_book_ids): break
def init_minute1_lib(): try: minute1_lib = arctic['minute1'] print('The minute1 library is already exists.') return except LibraryNotFoundException: arctic.initialize_library('minute1') minute1_lib = arctic['minute1'] start_date = '2018-01-01' for sid in tqdm(all_sid()): try: df = rq.get_price(sid, start_date=start_date, end_date=date.today(), frequency='1m', adjust_type='post') df.index.name = 'date' if len(df) > 0: minute1_lib.write(sid, df) except Exception as e: # TODO: add logger later print(f'{sid}: {str(e)}') minute1_lib.snapshot(str(date.today()))
def compute(self, begin_date=None, end_date=None): begin_date = datetime.strptime(begin_date, '%Y%m%d') - timedelta(days=20) all_filter_codes = filter_stock_pool(begin_date) for code in all_filter_codes: df_daily = rqd.get_price(code, begin_date, end_date) df_daily['ma10'] = df_daily['close'].rolling(10).mean() df_daily['delta'] = df_daily['close'] - df_daily['ma10'] df_daily['delta_pre'] = df_daily['delta'].shift(1) df_daily = df_daily[(df_daily['delta'] < 0) & (df_daily['delta_pre'] > 0)] update_requests = [] for date in df_daily.index: date = date.strftime('%Y%m%d') update_requests.append( UpdateOne({ 'code': code, 'date': date }, {'$set': { 'code': code, 'date': date }}, upsert=True)) if len(update_requests) > 0: update_result = self.collection.bulk_write(update_requests) print('保存-%s-%s数据 , 插入:%4d , 更新:%4d' % (code, self.name, update_result.upserted_count, update_result.modified_count), flush=True)
def get_momentum(stock_list, date, market_cap_on_current_day): trading_date_525_before = rqdatac.get_trading_dates(date - timedelta(days=1000), date, country='cn')[-525] trading_date_21_before = rqdatac.get_trading_dates(date - timedelta(days=40), date, country='cn')[-21] # 共需要 525 - 21 = 504 个交易日的收益率 exp_weight = get_exponential_weight(half_life=126, length=504) # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0) daily_return = rqdatac.get_price(stock_list, trading_date_525_before, trading_date_21_before, frequency='1d', fields='close').fillna(method='ffill').pct_change()[1:] # 剔除收益率数据存在空值的股票 inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index daily_return = daily_return.drop(daily_return[inds], axis=1) # 把复利无风险日收益率转为日收益率 compounded_risk_free_return = rqdatac.get_yield_curve(start_date=trading_date_525_before, end_date=date, tenor='0S') risk_free_return = (((1 + compounded_risk_free_return) ** (1 / 365)) - 1).loc[daily_return.index] relative_strength = np.log(1 + daily_return).T.subtract(np.log(1 + risk_free_return.iloc[:, 0])).dot(exp_weight) processed_relative_strength = winsorization_and_market_cap_weighed_standardization(relative_strength, market_cap_on_current_day[ relative_strength.index]) return processed_relative_strength
def download_minute_bar(vt_symbol): """下载某一合约的分钟线数据""" print(f"开始下载合约数据{vt_symbol}") symbol, exchange = vt_symbol.split(".") start = time() df = rq.get_price(symbol, frequency="1m", fields=FIELDS, start_date='20100416', end_date='20190416') bars = [] for ix, row in df.iterrows(): bar = generate_bar_from_row(row, symbol, exchange) bars.append(bar) database_manager.save_bar_data(bars) end = time() cost = (end - start) * 1000 print("合约%s的分钟K线数据下载完成%s - %s,耗时%s毫秒" % (symbol, df.index[0], df.index[-1], cost))
def downloadDailyBarBySymbol(symbol): """下载某一合约日线数据""" start = time() cl = db2[symbol] cl.ensure_index([('datetime', ASCENDING)], unique=True) # 添加索引 #df = rq.get_price(symbol, frequency='1d', fields=FIELDS, start_date='1900-1-1', end_date= '2013-01-04' ) df = rq.get_price(symbol, frequency='1d', fields=FIELDS, start_date='2013-1-4', end_date=datetime.now().strftime('%Y%m%d')) #end_date=datetime.now().strftime('%Y%m%d')) for ix, row in df.iterrows(): bar = generateVtBar(row, symbol) d = bar.__dict__ flt = {'datetime': bar.datetime} cl.replace_one(flt, d, True) end = time() cost = (end - start) * 1000 if df.empty == True: pass else: print(u'合约%s数据下载完成%s - %s,耗时%s毫秒' % (symbol, df.index[0], df.index[-1], cost))
def get_cash_earnings_to_price_ratio(latest_trading_date, recent_report_type, market_cap_on_current_day): cash_ttm = get_ttm_sum( rqdatac.financials.cash_flow_statement. cash_flow_from_operating_activities, recent_report_type) stock_list = cash_ttm.index.tolist() stock_price = rqdatac.get_price(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='close', adjust_type='none').T shares = rqdatac.get_shares(stock_list, start_date=latest_trading_date, end_date=latest_trading_date, fields='total').T cash_earning_to_price = cash_ttm / (stock_price * shares)[str(latest_trading_date)] processed_cash_earning_to_price = winsorization_and_market_cap_weighed_standardization( cash_earning_to_price, market_cap_on_current_day[cash_earning_to_price.index]) return processed_cash_earning_to_price
def update_minute_lib(): minute_lib = arctic['minute'] last_index = minute_lib.read( '000001.XSHG', columns=['close'], chunk_range=('2018-09-20', None)).index[-1] if last_index.time() == time(15): start_date = rq.get_next_trading_date(last_index) else: start_date = last_index.date() for sid in tqdm(all_sid()): try: df = rq.get_price( sid, start_date=start_date, end_date=date.today(), frequency='1m', adjust_type='post') df.index.name = 'date' if len(df) > 0: minute_lib.update( sid, df, chunk_range=(start_date, None), upsert=True) except Exception as e: # TODO: add logger later print(f'{sid}: {str(e)}')
def update_minute1_lib(): minute1_lib = arctic['minute1'] last_index = minute1_lib.read('000001.XSHG').data.index[-1] if last_index.time() == time(15): start_date = rq.get_next_trading_date(last_index) if not rq.get_trading_dates(start_date, date.today()): print('DB is already up to date.') return for sid in tqdm(all_sid()): try: df = rq.get_price(sid, start_date=start_date, end_date=date.today(), frequency='1m', adjust_type='post') df.index.name = 'date' if len(df) > 0: minute1_lib.append(sid, df, upsert=True) except Exception as e: # TODO: add logger later print(f'{sid}: {str(e)}') else: # read previous version, and get last index to compute start date previous_version = minute1_lib.list_versions('000001.XSHG')[1] last_index = minute1_lib.read( '000001.XSHG', as_of=previous_version['version']).data.index[-1] start_date = rq.get_next_trading_date(last_index) for sid in tqdm(all_sid()): try: df = rq.get_price(sid, start_date=start_date, end_date=date.today(), frequency='1m', adjust_type='post') df.index.name = 'date' pre_version_number = minute1_lib.list_versions( sid)[1]['version'] minute1_lib.restore_version(sid, pre_version_number) if len(df) > 0: minute1_lib.append(sid, df, upsert=True) except Exception as e: # TODO: add logger later print(f'{sid}: {str(e)}')
def get_main_contract_price(future): future_name = future + "88" test_data = rq.get_price(future_name, end_date=time_of_today, frequency='1m') test_data = pd.DataFrame(test_data) save_file_name = future + "0000" + ".csv" test_data.to_csv(DATA_PATH + save_file_name)
def suspended_stocks_filter(stocks,date): """ :param stocks:股票列表 :param date: 检验停牌日期 :return: list """ volume = rqdatac.get_price(stocks,start_date=date,end_date=date,fields="volume").iloc[0] return volume[volume>0].index.tolist()
def update_future(message=None): all_instruments = rq.all_instruments(type="Future") sql.insert(all_instruments, "future/contract", "replace") futures = ["CU", "M", "SR"] for i in futures: data = all_instruments[all_instruments.underlying_symbol == i] data = data[data.symbol.str.contains("主力连续")] ids = data.order_book_id for id in ids: if message: message.append(u"正在更新期货合约:%s 主力连续日线数据" % id) message.update() else: print(u"正在更新期货合约:%s 主力连续日线数据" % id) table = "future/contracts/%s" % id if sql.is_table(table): if is_future_contract_need_update(table): start_date_string = get_started_date(table) df = rq.get_price(id, start_date=start_date_string, end_date=TODAY_STR, frequency='1d') if not sql.insert( df, table, "append", index=True, index_label="date"): df = rq.get_price(id, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d') sql.insert(df, table, "replace", index=True, index_label="date") else: df = rq.get_price(id, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d') sql.insert(df, table, "replace", index=True, index_label="date") return
def get_implicit_factor_return(date): latest_trading_date = str( rqdatac.get_previous_trading_date( datetime.strptime(date, "%Y-%m-%d") + timedelta(days=1))) previous_trading_date = str( rqdatac.get_previous_trading_date(latest_trading_date)) # 取前一交易日全市场已经上市的股票,保证日收益率计算 stock_list = rqdatac.all_instruments( type='CS', date=previous_trading_date)['order_book_id'].tolist() # 剔除上市不满21天的股票 trading_date_21_days_before = str( rqdatac.get_previous_trading_date(latest_trading_date, country='cn', n=21)) stock_list = [ i for i in stock_list if rqdatac.instruments(i).listed_date <= trading_date_21_days_before ] # 剔除ST股 is_st_df = rqdatac.is_st_stock(stock_list, start_date=previous_trading_date, end_date=previous_trading_date) is_st_df.index = is_st_df.index.astype(str) stock_list = is_st_df.loc[previous_trading_date][ is_st_df.loc[previous_trading_date].values == False].index.tolist() # 剔除停牌股 trading_volume = rqdatac.get_price(stock_list, start_date=previous_trading_date, end_date=previous_trading_date, frequency='1d', fields='volume', country='cn') stock_list = trading_volume.loc[previous_trading_date][ trading_volume.loc[previous_trading_date].values > 0].index.tolist() # 计算全市场前一交易日的行业暴露度 factor_exposure = get_exposure(stock_list, str(previous_trading_date)) # 根据上述四类暴露度计算因子收益率 factor_returns = factor_return_estimation(latest_trading_date, factor_exposure) return factor_returns
def data_preprocessing(self, equity_list, start_date, end_date): if self.et is 'funds': period_data = rqdatac.fund.get_nav(equity_list, start_date, end_date, fields='acc_net_value') elif self.et is 'stocks': period_data = rqdatac.get_price(equity_list, start_date, end_date, frequency='1d', fields=['close', 'volume']) period_prices = period_data['close'] period_volume = period_data['volume'] self.period_prices = period_prices # Set up the threshhold of elimination out_threshold = ceil(period_prices.shape[0] / 2) reset_end_date = pd.to_datetime(end_date) reset_start_date = pd.to_datetime(start_date) kickout_list = list() suspended_list = list() # Locate the first valid value of each column, if available sequence length is less than threshhold, add # the column name into out_list; if sequence length is longer than threshold but less than chosen period length, # reset the start_date to the later date. The latest start_date whose sequence length is greater than threshold # will be chose. # Check whether any stocks has long suspended trading periods or has been delisted and generate list # for such stocks for i in equity_list: if not period_volume.loc[:, i].value_counts().empty: if ((reset_end_date - period_prices.loc[:, i].first_valid_index()) / np.timedelta64(1, 'D')) < out_threshold: kickout_list.append(i) elif period_prices.loc[:, i].first_valid_index() < reset_start_date: reset_start_date = period_prices.loc[:, i].first_valid_index() elif period_volume.loc[:, i].last_valid_index() < reset_end_date or \ period_volume.loc[:, i].value_counts().iloc[0] >= out_threshold: suspended_list.append(i) else: kickout_list.append(i) # Check whether any ST stocks are included and generate a list for ST stocks st_list = list(period_prices.columns.values[rqdatac.is_st_stock(equity_list, reset_start_date, reset_end_date).sum(axis=0)>0]) # Generate final kickout list which includes all the above # kickout_list_s = set(kickout_list) # st_list_s = set(st_list) # suspended_list_s = set(suspended_list) # two_list_union = st_list_s.union(suspended_list_s) # final_dif = two_list_union - kickout_list_s # final_kickout_list = kickout_list + list(final_dif) final_kickout_list = list(set().union(kickout_list, st_list, suspended_list)) # Generate clean data equity_list_s = set(equity_list) final_kickout_list_s = set(final_kickout_list) clean_equity_list = list(equity_list_s - final_kickout_list_s) clean_period_prices = period_prices.loc[reset_start_date:reset_end_date, clean_equity_list] self.clean_period_prices = clean_period_prices self.clean_equity_list = list(clean_period_prices.columns.values) self.kickout_list = kickout_list self.st_list = st_list self.suspended_list = suspended_list self.reset_start_d = reset_start_date self.reset_end_d = reset_end_date
def get_option_price_each_day(_date, all_ids_) -> pd.Series: price_ = rqdatac.get_price(all_ids_, _date, _date, expect_df=True) if price_ is not None: price_ = price_['close'].reset_index(level=1, drop=True).rename('option_price') msg = 'Option price data missing' check_if_missing_items(price_.index.tolist(), all_ids_, msg) return price_ else: return pd.Series()
def get_cumulative_range(stock_list, date, market_cap_on_current_day): trading_date_253_before = rqdatac.get_trading_dates(date - timedelta(days=500), date, country='cn')[-253] daily_return = rqdatac.get_price( stock_list, trading_date_253_before, date, frequency='1d', fields='close').fillna(method='ffill').pct_change()[1:] # 剔除收益率数据存在空值的股票 inds = daily_return.isnull().sum()[daily_return.isnull().sum() > 0].index daily_return = daily_return.drop(daily_return[inds], axis=1) # 把复利无风险日收益率转为日收益率 compounded_risk_free_return = rqdatac.get_yield_curve( start_date=trading_date_253_before, end_date=date, tenor='3M') risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) - 1).loc[daily_return.index] # 每21个交易日为一个时间区间 spliting_points = np.arange(0, 273, 21) cumulative_return = pd.DataFrame() for period in range(1, len(spliting_points)): compounded_return = ( (1 + daily_return.iloc[spliting_points[0]:spliting_points[period]] ).cumprod() - 1).iloc[-1] compounded_risk_free_return = ( (1 + risk_free_return.iloc[spliting_points[0]:spliting_points[period]] ).cumprod() - 1).iloc[-1] cumulative_return[period] = np.log(1 + compounded_return).subtract( np.log(1 + compounded_risk_free_return.iloc[0])) cumulative_return = cumulative_return.cumsum(axis=1) processed_cumulative_range = winsorization_and_market_cap_weighed_standardization( cumulative_return.T.max() - cumulative_return.T.min(), market_cap_on_current_day) return processed_cumulative_range
def data_process(order_book_ids, equity_type, start_date): windows = 132 end_date = rqdatac.get_previous_trading_date(start_date) for i in range(windows + 1): start_date = rqdatac.get_previous_trading_date(start_date) if equity_type is 'funds': period_data = rqdatac.fund.get_nav(order_book_ids, start_date, end_date, fields='acc_net_value') elif equity_type is 'stocks': period_data = rqdatac.get_price(order_book_ids, start_date, end_date, frequency='1d', fields=['close', 'volume']) period_prices = period_data['close'] period_volume = period_data['volume'] # Set up the threshhold of elimination out_threshold = ceil(period_prices.shape[0] / 2) kickout_list = list() suspended_list = list() # Locate the first valid value of each column, if available sequence length is less than threshhold, add # the column name into out_list; if sequence length is longer than threshold but less than chosen period length, # reset the start_date to the later date. The latest start_date whose sequence length is greater than threshold # will be chose. # Check whether any stocks has long suspended trading periods or has been delisted and generate list # for such stocks for i in order_book_ids: if not period_volume.loc[:, i].value_counts().empty: if ((end_date - period_prices.loc[:, i].first_valid_index()) / np.timedelta64(1, 'D')) \ < out_threshold: kickout_list.append(i) elif period_prices.loc[:, i].first_valid_index() < start_date: reset_start_date = period_prices.loc[:, i].first_valid_index() elif period_volume.loc[:, i].last_valid_index() < end_date or \ period_volume.loc[:, i].value_counts().iloc[0] >= out_threshold: suspended_list.append(i) else: kickout_list.append(i) # Check whether any ST stocks are included and generate a list for ST stocks st_list = list(period_prices.columns.values[rqdatac.is_st_stock( order_book_ids, reset_start_date, end_date).sum(axis=0) > 0]) # Generate final kickout list which includes all the above final_kickout_list = list(set().union(kickout_list, st_list, suspended_list)) # Generate clean data order_book_ids_s = set(order_book_ids) final_kickout_list_s = set(final_kickout_list) clean_order_book_ids = list(order_book_ids_s - final_kickout_list_s) clean_period_prices = period_prices.loc[reset_start_date:end_date, clean_order_book_ids] return clean_period_prices, final_kickout_list
def get_daily_excess_return(stock_list, start_date, end_date): # 提取股票价格数据,对于退市情况,考虑作股价向前填补(日收益率为0) stock_daily_return = rqdatac.get_price( stock_list, rqdatac.get_previous_trading_date(start_date), end_date, frequency='1d', fields='close').fillna(method='ffill').pct_change()[1:] # 剔除收益率数据存在空值的股票 inds = stock_daily_return.isnull().sum()[ stock_daily_return.isnull().sum() > 0].index filtered_stock_daily_return = stock_daily_return.drop(inds, axis=1) # 经测试发现,中证全指(000985)作为 market portfolio 的效果最好 market_portfolio_daily_return = rqdatac.get_price( '000985.XSHG', rqdatac.get_previous_trading_date(start_date), end_date, frequency='1d', fields='close').pct_change()[1:] # 计算无风险日收益率 compounded_risk_free_return = rqdatac.get_yield_curve( start_date=start_date, end_date=end_date, tenor='3M') risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) - 1).loc[filtered_stock_daily_return.index] daily_excess_return = filtered_stock_daily_return.T.subtract( risk_free_return.iloc[:, 0]).T market_portfolio_daily_excess_return = market_portfolio_daily_return.subtract( risk_free_return.iloc[:, 0]) return daily_excess_return, market_portfolio_daily_excess_return
def get_tick(contract_id, start_date, end_date): """ tick数据下载 :param contract_id: :param start_date: :param end_date: :return: """ df_tick = rq.get_price(contract_id, start_date=start_date, end_date=end_date, frequency='tick', fields=None) return df_tick.between_time('09:25:00', '15:10:00')
def get_stock_beta(stock_list, stock_excess_return, benchmark, latest_trading_date, market_cap_on_current_day): trading_date_253_before = rqdatac.get_trading_dates(latest_trading_date - timedelta(days=500), latest_trading_date, country='cn')[-253] exp_weight = get_exponential_weight(half_life=63, length=252) weighted_stock_excess_return = stock_excess_return.T.multiply(exp_weight).T compounded_risk_free_return = rqdatac.get_yield_curve( start_date=trading_date_253_before, end_date=latest_trading_date, tenor='3M') risk_free_return = (((1 + compounded_risk_free_return)**(1 / 365)) - 1).loc[stock_excess_return.index] market_portfolio_daily_return = rqdatac.get_price( benchmark, trading_date_253_before, latest_trading_date, frequency='1d', fields='close').fillna(method='ffill').pct_change()[1:] market_portfolio_excess_return = market_portfolio_daily_return.subtract( risk_free_return.iloc[:, 0]) weighted_market_portfolio_excess_return = market_portfolio_excess_return.multiply( exp_weight).T weighted_market_portfolio_variance = weighted_market_portfolio_excess_return.var( ) beta = [ weighted_market_portfolio_excess_return.cov( weighted_stock_excess_return[stock]) / weighted_market_portfolio_variance for stock in stock_excess_return.columns ] stock_beta = pd.Series(beta, index=stock_excess_return.columns) # 用回归方法处理 beta 的缺失值 imputed_stock_beta = individual_factor_imputation( stock_list, stock_beta, market_cap_on_current_day, latest_trading_date.strftime('%Y-%m-%d')) return imputed_stock_beta
def future_data_load(args, start_date=None, end_date=None): """ 期货分钟数据 :param args: 合约list or 合约str :param start_date: 开始日期 :param end_date: 结束日期 :return: """ if type(args) is str: contract_id = args data_dict = {contract_id: pd.DataFrame()} try: contract_data = rq.get_price(contract_id, start_date=start_date, end_date=end_date, frequency='1m', fields=None) except AttributeError: print(contract_id) raise AttributeError data_dict[contract_id] = contract_data else: data_dict = dict() for contract_id in args: try: contract_data = rq.get_price(contract_id, start_date=start_date, end_date=end_date, frequency='1m', fields=None) except AttributeError: print(contract_id) raise AttributeError data_dict[contract_id] = contract_data return data_dict
def update_future(): SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/future') all_instruments = rq.all_instruments(type="Future") all_instruments.to_sql("contract", SQL, if_exists='replace') futures = ["CU", "M", "SR"] for i in futures: data = all_instruments[all_instruments.underlying_symbol == i] data = data[data.symbol.str.contains("主力连续")] ids = data.order_book_id for id in ids: table = "daily_%s" % id if is_table_exist(table, SQL): start_date_string = get_started_date(table, SQL) if start_date_string: df = rq.get_price(id, start_date=start_date_string, end_date=TODAY_STR, frequency='1d') write(df, table, SQL, if_exists='append') print(u"已更新期货合约:%s 主力连续日线数据, 开始时间: %s, 结束时间: %s" % (id, start_date_string, TODAY_STR)) else: df = rq.get_price(id, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d') write(df, table, SQL, if_exists='replace') print(u"已更新期货合约:%s 主力连续日线数据, 开始时间: %s, 结束时间: %s" % (id, START_DATE_STR, TODAY_STR)) return
def update_stock(): #SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/stock') SQL = create_engine('postgresql://*****:*****@#[email protected]:5432/stock') all_instruments = rq.all_instruments(type="CS") all_instruments.to_sql("contract", SQL, if_exists='replace') contracts_df = pd.read_sql_query('select * from "contract"', con=SQL) ids = contracts_df["order_book_id"] for id in ids: table = "daily_%s" % id if is_table_exist(table, SQL): start_date_string = get_started_date(table, SQL) if start_date_string: df = rq.get_price(id, start_date=start_date_string, end_date=TODAY_STR, frequency='1d') write(df, table, SQL, if_exists='append') print(u"已更新股票:%s 日线行情,开始时间: %s, 结束时间: %s" % (id, start_date_string, TODAY_STR)) else: df = rq.get_price(id, start_date=START_DATE_STR, end_date=TODAY_STR, frequency='1d') write(df, table, SQL, if_exists='replace') print(u"已更新股票:%s 日线行情,开始时间: %s, 结束时间: %s" % (id, START_DATE_STR, TODAY_STR)) return
def get_price_and_save_to_csv(a, temp): temp_path = DATA_PATH + temp for name, group in a: future_name = name start_date = group.index[0].strftime('%Y%m%d') end_date = group.index[-1].strftime('%Y%m%d') print future_name print start_date print end_date test_data = rq.get_price(future_name, start_date=start_date, end_date=end_date, frequency='1m') test_data = pd.DataFrame(test_data) save_file_name = future_name + ".csv" test_data.to_csv(temp_path + "\\" + save_file_name)
def get_explicit_factor_returns(date): """ :param date:日期 :return: pandas.Series """ previous_trading_date = rqdatac.get_previous_trading_date(date) all_a_stocks = rqdatac.all_instruments(type="CS",date=previous_trading_date).order_book_id.tolist() filtered_stocks = noisy_stocks_filter(all_a_stocks,previous_trading_date) # print(all_a_stocks,previous_trading_date) factor_exposures = rqdatac.get_style_factor_exposure(all_a_stocks, previous_trading_date, previous_trading_date, "all").sort_index() factor_exposures.index=factor_exposures.index.droplevel(1) closePrice = rqdatac.get_price(all_a_stocks, rqdatac.get_previous_trading_date(previous_trading_date), previous_trading_date, fields="close") priceChange = closePrice.pct_change().iloc[-1] index_mapping = {"csi_300":'000300.XSHG',"csi_500":"000905.XSHG","csi_800":"000906.XSHG"} all_stocks = {index:rqdatac.index_components(index_mapping.get(index),date=previous_trading_date) for index in index_mapping} all_stocks['whole_market'] = filtered_stocks def _calc_explicitReturns_with_stocksList(stocksList): # 根据股票池计算收益率 _sizeBeta = factor_exposures[['size','beta']].loc[stocksList] _quantileGroup = _sizeBeta.apply(lambda x:pd.cut(x,bins=3,labels=False)+1).reset_index() _quantileStocks = _quantileGroup.groupby(['size','beta']).apply(lambda x:x.index.tolist()) market_neutralize_stocks = _quantileStocks.apply( lambda x: pd.Series(stocksList).loc[x].values.tolist()).values.tolist() return factor_exposures.loc[stocksList].apply(lambda x,y=market_neutralize_stocks:_calc_single_explicit_returns(x,y)) def _calc_single_explicit_returns(_factor_exposure,market_neutralize_stocks): # 计算单一因子收益率 def _deuce(series): median = series.median() return [series[series<=median].index.tolist(),series[series>median].index.tolist()] deuceResults = np.array([_deuce(_factor_exposure[neutralized_stks]) for neutralized_stks in market_neutralize_stocks]).flatten() short_stocksList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==0]))) long_stockList = list(reduce(lambda x,y:set(x)|set(y),np.array([s for i,s in enumerate(deuceResults) if i%2==1]))) return priceChange[long_stockList].mean() - priceChange[short_stocksList].mean() results = {key: _calc_explicitReturns_with_stocksList(all_stocks.get(key)) for key in all_stocks} return pd.DataFrame(results)[['whole_market','csi_300','csi_500','csi_800']]
def downloadDailyBarBySymbol(symbol): """下载某一合约日线数据""" start = time() cl = db2[symbol] cl.ensure_index([('datetime', ASCENDING)], unique=True) # 添加索引 df = rq.get_price(symbol, frequency='1d', fields=FIELDS, end_date=datetime.now().strftime('%Y%m%d')) for ix, row in df.iterrows(): bar = generateVtBar(row, symbol) d = bar.__dict__ flt = {'datetime': bar.datetime} cl.replace_one(flt, d, True) end = time() cost = (end - start) * 1000 print(u'合约%s数据下载完成%s - %s,耗时%s毫秒' %(symbol, df.index[0], df.index[-1], cost))
def downloadMinuteBarBySymbol(symbol): """下载某一合约的分钟线数据""" start = time() cl = dbMinute[symbol] cl.ensure_index([('datetime', ASCENDING)], unique=True) # 添加索引 df = rq.get_price(symbol, frequency='1m', fields=FIELDS) for ix, row in df.iterrows(): bar = generateVtBar(row, symbol) d = bar.__dict__ flt = {'datetime': bar.datetime} cl.replace_one(flt, d, True) end = time() cost = (end - start) * 1000 print(u'合约%s的分钟K线数据下载完成%s - %s,耗时%s毫秒' %(symbol, df.index[0], df.index[-1], cost))
def download_minute_bar(vt_symbol): """下载某一合约的分钟线数据""" print(f"开始下载合约数据{vt_symbol}") symbol, exchange = vt_symbol.split(".") start = time() df = rq.get_price(symbol, frequency="1m", fields=FIELDS) with DB.atomic(): for ix, row in df.iterrows(): print(row.name) bar = generate_bar_from_row(row, symbol, exchange) DbBarData.replace(bar.__data__).execute() end = time() cost = (end - start) * 1000 print( "合约%s的分钟K线数据下载完成%s - %s,耗时%s毫秒" % (symbol, df.index[0], df.index[-1], cost) )
def downloadTickBySymbol(symbol, date): """下载某一合约日线数据""" start = time() cl = dbTick[symbol] cl.ensure_index([('datetime', ASCENDING)], unique=True) # 添加索引 df = rq.get_price(symbol, frequency='tick', start_date=date, end_date=date) for ix, row in df.iterrows(): tick = generateVtTick(row, symbol) d = tick.__dict__ flt = {'datetime': tick.datetime} cl.replace_one(flt, d, True) end = time() cost = (end - start) * 1000 print(u'合约%sTick数据下载完成%s - %s,耗时%s毫秒' %(symbol, df.index[0], df.index[-1], cost))