def _database_query_dict(code, start, end, frequence=''): ''' database query string method :param code: stock/future/index code list :param start, end: date string :param frequence: minute frequence, default is empty string :return: dict of database query depends on frequence ''' if frequence: return { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence } else: return { 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }
def QA_fetch_stock_min(code, start, end, format='numpy', frequence='1min', collections=DATABASE.stock_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' else: print( "💢 Error QA_fetch_stock_min parameter frequence=%s is none of 1min 1m 5min 5m 15min 15m 30min 30m 60min 60m" % frequence) __data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find({ 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }) res = pd.DataFrame([item for item in cursor]) try: res = res.drop( '_id', axis=1).assign(volume=res.vol).query('volume>1').assign( datetime=pd.to_datetime(res.datetime)).drop_duplicates( ['datetime', 'code']).set_index('datetime', drop=False) # return res except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "💢 Error QA_fetch_stock_min format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None
def QA_fetch_stock_min(code, startTime, endTime, format_='numpy', type_='1min', collections=QA_Setting.client.quantaxis.stock_min): '获取股票分钟线' if type_ in ['1min', '1m']: type_ = '1min' elif type_ in ['5min', '5m']: type_ = '5min' elif type_ in ['15min', '15m']: type_ = '15min' elif type_ in ['30min', '30m']: type_ = '30min' elif type_ in ['60min', '60m']: type_ = '60min' __data = [] for item in collections.find({ 'code': str(code), "time_stamp": { "$gte": QA_util_time_stamp(startTime), "$lte": QA_util_time_stamp(endTime) }, 'type': type_ }): __data.append([str(item['code']), float(item['open']), float(item['high']), float( item['low']), float(item['close']), float(item['vol']), item['datetime'], item['time_stamp'], item['date']]) __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'datetime', 'time_stamp', 'date']) __data['datetime'] = pd.to_datetime(__data['datetime']) __data = __data.set_index('datetime', drop=False) #res = QA_fetch_stock_to_fq(__data) if format_ in ['numpy', 'np', 'n']: return numpy.asarray(__data) elif format_ in ['list', 'l', 'L']: return numpy.asarray(__data).tolist() elif format_ in ['P', 'p', 'pandas', 'pd']: return __data
def QA_fetch_stock_min(code, startTime, endTime, type_='numpy', collections=QA_Setting.client.quantaxis.stock_min_five): '获取前复权股票分钟线' __data = [] __data_fq = [] for item in collections.find({ 'code': str(code), "time_stamp": { "$gte": QA_util_time_stamp(startTime), "$lte": QA_util_time_stamp(endTime) } }): __data.append([str(item['code']), float(item['open']), float(item['high']), float( item['low']), float(item['close']), float(item['volume']), item['datetime'], item['time_stamp'], item['date']]) __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'datetime', 'time_stamp', 'date']) __data['datetime'] = pd.to_datetime(__data['datetime']) __data = __data.set_index('datetime', drop=False) res = QA_fetch_stock_to_fq(__data) if type_ in ['numpy', 'np', 'n']: return numpy.asarray(res) elif type_ in ['list', 'l', 'L']: return numpy.asarray(res).tolist() elif type_ in ['P', 'p', 'pandas', 'pd']: return res
def QA_fetch_index_min_adv(code, start, end=None, frequence='1min', if_drop_index=False, collections=DATABASE.index_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' __data = [] end = start if end is None else end if len(start) == 10: start = '{} 09:30:00'.format(start) if len(end) == 10: end = '{} 15:00:00'.format(end) if isinstance(code, str): for item in collections.find({ 'code': str(code), "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }): __data.append([ str(item['code']), float(item['open']), float(item['high']), float(item['low']), float(item['close']), float(item['vol']), item['datetime'], item['time_stamp'], item['date'] ]) __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'datetime', 'time_stamp', 'date' ]) __data['datetime'] = pd.to_datetime(__data['datetime']) return QA_DataStruct_Index_min( __data.query('volume>1').set_index(['datetime', 'code'], drop=if_drop_index)) elif isinstance(code, list): return QA_DataStruct_Index_min( pd.concat([ QA_fetch_index_min_adv(code_, start, end, frequence, if_drop_index).data for code_ in code ]).set_index(['datetime', 'code'], drop=if_drop_index))
def QA_fetch_ctp_tick(code, start, end, frequence, format='pd', collections=DATABASE.ctp_tick): """仅供存储的ctp tick使用 Arguments: code {[type]} -- [description] Keyword Arguments: format {str} -- [description] (default: {'pd'}) collections {[type]} -- [description] (default: {DATABASE.ctp_tick}) Returns: [type] -- [description] """ code = QA_util_code_tolist(code, auto_fill=False) cursor = collections.find({ 'InstrumentID': {'$in': code}, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, {"_id": 0}, batch_size=10000) hq = pd.DataFrame([data for data in cursor]).replace(1.7976931348623157e+308, numpy.nan).replace('', numpy.nan).dropna(axis=1) p1 = hq.loc[:, ['ActionDay', 'AskPrice1', 'AskVolume1', 'AveragePrice', 'BidPrice1', 'BidVolume1', 'HighestPrice', 'InstrumentID', 'LastPrice', 'OpenInterest', 'TradingDay', 'UpdateMillisec', 'UpdateTime', 'Volume']] p1 = p1.assign(datetime=p1.ActionDay.apply(QA_util_date_int2str)+' '+p1.UpdateTime + (p1.UpdateMillisec/1000000).apply(lambda x: str('%.6f' % x)[1:]), code=p1.InstrumentID) p1.datetime = pd.to_datetime(p1.datetime) return p1.set_index(p1.datetime)
def QA_fetch_stock_min_adv(code, start, end, type_='1min', if_drop_index=False, collections=QA_Setting.client.quantaxis.stock_min): '获取股票分钟线' if type_ in ['1min', '1m']: type_ = '1min' elif type_ in ['5min', '5m']: type_ = '5min' elif type_ in ['15min', '15m']: type_ = '15min' elif type_ in ['30min', '30m']: type_ = '30min' elif type_ in ['60min', '60m']: type_ = '60min' __data = [] if isinstance(code, str): for item in collections.find({ 'code': str(code), "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': type_ }): __data.append([ str(item['code']), float(item['open']), float(item['high']), float(item['low']), float(item['close']), float(item['vol']), item['datetime'], item['time_stamp'], item['date'] ]) __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'datetime', 'time_stamp', 'date' ]) __data['datetime'] = pd.to_datetime(__data['datetime']) return QA_DataStruct_Stock_min( __data.query('volume>1').set_index(['datetime', 'code'], drop=if_drop_index)) elif isinstance(code, list): '新增codelist的代码' return QA_DataStruct_Stock_min( pd.concat([ QA_fetch_stock_min_adv(code_, start, end, type_, if_drop_index).data for code_ in code ]).set_index(['datetime', 'code'], drop=if_drop_index))
def QA_fetch_index_min(code, start, end, format='numpy', frequence='1min', collections=DATABASE.index_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' __data = [] code = QA_util_code_tolist(code) cursor = collections.find({ 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }) if format in ['dict', 'json']: return [data for data in cursor] for item in cursor: __data.append([ str(item['code']), float(item['open']), float(item['high']), float(item['low']), float(item['close']), float(item['vol']), item['datetime'], item['time_stamp'], item['date'] ]) __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'datetime', 'time_stamp', 'date' ]) __data['datetime'] = pd.to_datetime(__data['datetime']) __data = __data.set_index('datetime', drop=False) if format in ['numpy', 'np', 'n']: return numpy.asarray(__data) elif format in ['list', 'l', 'L']: return numpy.asarray(__data).tolist() elif format in ['P', 'p', 'pandas', 'pd']: return __data
def QA_fetch_stock_min(code, start, end, format='numpy', frequence='1min', collections=DATABASE.stock_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' __data = [] # code checking if isinstance(code, str): code = [str(code)[0:6]] elif isinstance(code, list): code = [str(item)[0:6] for item in code] cursor = collections.find({ 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }) res = pd.DataFrame([item for item in cursor]) try: res = res.drop('_id', axis=1).assign(volume=res.vol).assign( datetime=pd.to_datetime(res.datetime)).drop_duplicates( ['datetime', 'code']).set_index('datetime', drop=False) #return res except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: return None
def QA_fetch_stock_min(code, start, end, frequence='1min', format='numpy', collections=DATABASE.stock_min): '获取股票分钟线' if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): '''数据获取''' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' else: QA_util_log_info( "QA Error QA_fetch_stock_min parameter frequence=%s is none of 1min 1m 5min 5m 15min 15m 30min 30m 60min 60m" % frequence) __data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_MIN, query='volume>1') print(res) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_stock_min data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_fetch_index_transaction(code, start, end, format='numpy', frequence='tick', collections=DATABASE.index_transaction): '获取股票分钟线' if frequence in ['tick', 'TICK', 'transaction']: frequence = 'tick' else: print( "QA Error QA_fetch_index_transaction parameter frequence=%s is none of tick Tick transaction" % frequence) _data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) try: res = res.assign(volume=res.vol, datetime=pd.to_datetime( res.datetime)).query('volume>1').drop_duplicates( ['datetime', 'code']).set_index('datetime', drop=False) # return res except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_index_transaction format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None
def QA_fetch_future_min(code, start, end, frequence='1min', format='numpy', collections=DATABASE.future_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' start = str(start)[0:19] end = str(end)[0:19] code = QA_util_code_tolist(code, auto_fill=False) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.FUTURE_MIN, query=None) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_future_min data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_save_tdx_to_mongo(file_dir, client=QA_Setting.client): reader = TdxMinBarReader() __coll = client.quantaxis.stock_min_five for a, v, files in os.walk(file_dir): for file in files: if (str(file)[0:2] == 'sh' and int(str(file)[2]) == 6) or \ (str(file)[0:2] == 'sz' and int(str(file)[2]) == 0) or \ (str(file)[0:2] == 'sz' and int(str(file)[2]) == 3): QA_util_log_info('Now_saving ' + str(file)[2:8] + '\'s 5 min tick') fname = file_dir + '\\' + file df = reader.get_df(fname) df['code'] = str(file)[2:8] df['market'] = str(file)[0:2] df['datetime'] = [str(x) for x in list(df.index)] df['date'] = [str(x)[0:10] for x in list(df.index)] df['time_stamp'] = df['datetime'].apply( lambda x: QA_util_time_stamp(x)) df['date_stamp'] = df['date'].apply( lambda x: QA_util_date_stamp(x)) data_json = json.loads(df.to_json(orient='records')) __coll.insert_many(data_json)
def QA_save_tdx_to_mongo(file_dir, client=DATABASE): """save file Arguments: file_dir {str:direction} -- 文件的地址 Keyword Arguments: client {Mongodb:Connection} -- Mongo Connection (default: {DATABASE}) """ reader = TdxMinBarReader() __coll = client.stock_min_five for a, v, files in os.walk(file_dir): for file in files: if (str(file)[0:2] == 'sh' and int(str(file)[2]) == 6) or \ (str(file)[0:2] == 'sz' and int(str(file)[2]) == 0) or \ (str(file)[0:2] == 'sz' and int(str(file)[2]) == 3): QA_util_log_info('Now_saving ' + str(file)[2:8] + '\'s 5 min tick') fname = file_dir + os.sep + file df = reader.get_df(fname) df['code'] = str(file)[2:8] df['market'] = str(file)[0:2] df['datetime'] = [str(x) for x in list(df.index)] df['date'] = [str(x)[0:10] for x in list(df.index)] df['time_stamp'] = df['datetime'].apply( lambda x: QA_util_time_stamp(x)) df['date_stamp'] = df['date'].apply( lambda x: QA_util_date_stamp(x)) data_json = json.loads(df.to_json(orient='records')) __coll.insert_many(data_json)
def QA_fetch_get_stock_min(code, start, end, frequence='1min', ip=best_ip['stock'], port=7709): api = TdxHq_API() type_ = '' start_date = str(start)[0:10] today_ = datetime.date.today() lens = QA_util_get_trade_gap(start_date, today_) if str(frequence) in ['5', '5m', '5min', 'five']: frequence, type_ = 0, '5min' lens = 48 * lens elif str(frequence) in ['1', '1m', '1min', 'one']: frequence, type_ = 8, '1min' lens = 240 * lens elif str(frequence) in ['15', '15m', '15min', 'fifteen']: frequence, type_ = 1, '15min' lens = 16 * lens elif str(frequence) in ['30', '30m', '30min', 'half']: frequence, type_ = 2, '30min' lens = 8 * lens elif str(frequence) in ['60', '60m', '60min', '1h']: frequence, type_ = 3, '60min' lens = 4 * lens if lens > 20800: lens = 20800 with api.connect(ip, port): data = pd.concat([api.to_df(api.get_security_bars(frequence, _select_market_code( str(code)), str(code), (int(lens / 800) - i) * 800, 800)) for i in range(int(lens / 800) + 1)], axis=0) data = data\ .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\ .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\ .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\ .assign(type=type_).set_index('datetime', drop=False, inplace=False)[start:end] return data.assign(datetime=data['datetime'].apply(lambda x: str(x)))
def QA_fetch_get_security_bars(code, _type, lens, ip=best_ip['stock'], port=7709): """按bar长度推算数据 Arguments: code {[type]} -- [description] _type {[type]} -- [description] lens {[type]} -- [description] Keyword Arguments: ip {[type]} -- [description] (default: {best_ip}) port {[type]} -- [description] (default: {7709}) Returns: [type] -- [description] """ api = TdxHq_API() with api.connect(ip, port): data = pd.concat([api.to_df(api.get_security_bars(_select_type(_type), _select_market_code( code), code, (i - 1) * 800, 800)) for i in range(1, int(lens / 800) + 2)], axis=0) data = data\ .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\ .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\ .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\ .assign(type=_type).set_index('datetime', drop=False, inplace=False).tail(lens) if data is not None: return data else: return None
def QA_fetch_get_index_min(code, start, end, level='1min', ip=best_ip, port=7709): '指数分钟线' api = TdxHq_API() type_ = '' if str(level) in ['5', '5m', '5min', 'five']: level, type_ = 0, '5min' elif str(level) in ['1', '1m', '1min', 'one']: level, type_ = 8, '1min' elif str(level) in ['15', '15m', '15min', 'fifteen']: level, type_ = 1, '15min' elif str(level) in ['30', '30m', '30min', 'half']: level, type_ = 2, '30min' elif str(level) in ['60', '60m', '60min', '1h']: level, type_ = 3, '60min' with api.connect(ip, port): if str(code)[0] in ['5', '1']: # ETF data = pd.concat([api.to_df(api.get_security_bars( level, 1 if str(code)[0] in ['0', '8', '9', '5'] else 0, code, (25 - i) * 800, 800)) for i in range(26)], axis=0) else: data = pd.concat([api.to_df(api.get_index_bars( level, 1 if str(code)[0] in ['0', '8', '9', '5'] else 0, code, (25 - i) * 800, 800)) for i in range(26)], axis=0) data = data\ .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\ .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\ .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\ .assign(type=type_).set_index('datetime', drop=False, inplace=False)[start:end] # data return data.assign(datetime=data['datetime'].apply(lambda x: str(x)))
def format_stock_data(self, item): code = item.get('code') new_code = code[-6:] d = {} #print(new_code) #print(item) import pandas as pd stock_pool_pd = pd.read_csv("/root/sim/stock_strategy/stock_pool.csv",encoding='utf-8',converters = {'code':str}); stock_pool_list = stock_pool_pd['code'].tolist() if new_code in stock_pool_list and item.get('frequence') == '5min': d['code'] = new_code d['open'] = item.get('open') d['high'] = item.get('high') d['close'] = item.get('close') d['low'] = item.get('low') d['vol'] = item.get('volume') d['type'] = item.get('frequence') #d['amount'] = item.get('volume') d['date_stamp'] = QA_util_date_stamp(item.get('datetime')) d['time_stamp'] = QA_util_time_stamp(item.get('datetime')) d['date'] = item.get('datetime')[0:10] #2020-10-12 d['datetime'] = item.get('datetime') #2020-10-12 10:02:00 d['tradetime'] = item.get('datetime')[0:16] #2020-10-12 10:02 return d
def __transform_gm_to_qa(file_path: str = None, end_time: str = None, type_="1min"): """ 导入相应 csv 文件,并处理格式 1. 这里默认为掘金数据格式: amount bob close eob frequency high low open position pre_close symbol volume 0 2522972.0 2018-08-16 09:30:00+08:00 9.84 2018-08-16 09:31:00+08:00 60s 9.87 9.84 9.87 0 0.0 SHSE.600000 255900 1 3419453.0 2018-08-16 09:31:00+08:00 9.89 2018-08-16 09:32:00+08:00 60s 9.90 9.84 9.86 0 0.0 SHSE.600000 346400 ... 2. 与 QUANTAXIS.QAFetch.QATdx.QA_fetch_get_stock_min 获取数据进行匹配,具体处理详见相应源码 open close high low vol amount ... datetime 2018-12-03 09:31:00 10.99 10.90 10.99 10.90 2.211700e+06 2.425626e+07 ... """ if file_path is None: raise ValueError("输入文件地址") df_local = pd.read_csv(file_path) # 列名处理 df_local = df_local.rename(columns={ "eob": "datetime", "volume": "vol", "symbol": "code" }).drop(["bob", "frequency", "position", "pre_close"], axis=1) # 格式处理 df_local["code"] = df_local["code"].map(str).str.slice(5, ) df_local["datetime"] = pd.to_datetime( df_local["datetime"].map(str).str.slice( 0, 19)).dt.tz_localize(None).dt.tz_localize('Asia/Shanghai') df_local["date"] = df_local.datetime.map(str).str.slice(0, 10) df_local = df_local.set_index("datetime", drop=False) df_local["date_stamp"] = df_local["date"].apply( lambda x: QA_util_date_stamp(x)) df_local["time_stamp"] = (df_local["datetime"].map(str).apply( lambda x: QA_util_time_stamp(x))) df_local["type"] = type_ df_local = df_local.loc[slice(None, end_time)] df_local["datetime"] = df_local["datetime"].map(str) df_local["type"] = type_ return df_local[[ "open", "close", "high", "low", "vol", "amount", "datetime", "code", "date", "date_stamp", "time_stamp", "type", ]]
def QA_fetch_get_stock_min(code, start, end, level, ip='221.231.141.60', port=7709): api = TdxHq_API() market_code = __select_market_code(code) if str(level) in ['5', '5m', '5min', 'five']: level = 0 elif str(level) in ['1', '1m', '1min', 'one']: level = 8 elif str(level) in ['15', '15m', '15min', 'fifteen']: level = 1 elif str(level) in ['30', '30m', '30min', 'half']: level = 2 elif str(level) in ['60', '60m', '60min', '1h']: level = 3 with api.connect(ip, port): data = [] for i in range(26): data += api.get_security_bars(level, market_code, code, (25 - i) * 800, 800) data = api.to_df(data) data['datetime'] = pd.to_datetime(data['datetime']) data['code'] = code data = data.set_index('datetime', drop=False) data = data.drop(['year', 'month', 'day', 'hour', 'minute'], axis=1) data['datetime'] = data['datetime'].apply(lambda x: str(x)[0:19]) data['date'] = data['datetime'].apply(lambda x: str(x)[0:10]) data['date_stamp'] = data['date'].apply( lambda x: QA_util_date_stamp(x)) data['time_stamp'] = data['datetime'].apply( lambda x: QA_util_time_stamp(x)) return data[start:end]
def __transform_ss_to_qa(file_path: str = None, end_time: str = None, type_="1min"): """ 导入相应 csv 文件,并处理格式 1. 这里默认为天软数据格式: time symbol open high low close volume amount 0 2013-08-01 09:31:00 SH600000 7.92 7.92 7.87 7.91 518700 4105381 ... 2. 与 QUANTAXIS.QAFetch.QATdx.QA_fetch_get_stock_min 获取数据进行匹配,具体处理详见相应源码 open close high low vol amount ... datetime 2018-12-03 09:31:00 10.99 10.90 10.99 10.90 2.211700e+06 2.425626e+07 ... """ if file_path is None: raise ValueError("输入文件地址") df_local = pd.read_csv(file_path) # 列名处理 df_local = df_local.rename(columns={ "time": "datetime", "volume": "vol" }) # 格式处理 df_local = df_local.assign( code=df_local.symbol.map(str).str.slice(2), date=df_local.datetime.map(str).str.slice(0, 10), ).drop("symbol", axis=1) df_local = df_local.assign( datetime=pd.to_datetime(df_local.datetime).dt.tz_localize( None).dt.tz_localize('Asia/Shanghai'), date_stamp=df_local.date.apply(lambda x: QA_util_date_stamp(x)), time_stamp=df_local.datetime.apply( lambda x: QA_util_time_stamp(x)), type="1min", ).set_index("datetime", drop=False) df_local = df_local.loc[slice(None, end_time)] df_local["datetime"] = df_local["datetime"].map(str) df_local["type"] = type_ return df_local[[ "open", "close", "high", "low", "vol", "amount", "datetime", "code", "date", "date_stamp", "time_stamp", "type", ]]
def getMin(cls, code, start, end, if_fq='00', frequence=8): collections = cls.collectionsMin '获取股票分钟线' _, type_, _ = cls.getReverseFrequence(frequence) _data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': type_ }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) try: res = res.assign( volume=res.vol, datetime=pd.to_datetime( res.datetime)).query('volume>1').drop_duplicates( ['datetime', 'code']).set_index('datetime', drop=False) # return res except: res = None if cls.format in ['P', 'p', 'pandas', 'pd']: return res elif cls.format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif cls.format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif cls.format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_min format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % cls.format) return None
def QA_fetch_future_min( code, start, end, format='numpy', frequence='1min', collections=DATABASE.future_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' _data = [] code = QA_util_code_tolist(code, auto_fill=False) cursor = collections.find({ 'code': {'$in': code}, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, batch_size=10000) if format in ['dict', 'json']: return [data for data in cursor] for item in cursor: _data.append([str(item['code']), float(item['open']), float(item['high']), float( item['low']), float(item['close']), float(item.get('position', 0)), float(item.get('price', 0)), float(item.get('trade', item.get('volume', 0))), item['datetime'], item['tradetime'], item['time_stamp'], item['date'], item['type']]) _data = DataFrame(_data, columns=[ 'code', 'open', 'high', 'low', 'close', 'position', 'price', 'trade', 'datetime', 'tradetime', 'time_stamp', 'date', 'type']) _data = _data.assign(datetime=pd.to_datetime(_data['datetime'])).drop_duplicates( (['datetime', 'code'])).set_index('datetime', drop=False) if format in ['numpy', 'np', 'n']: return numpy.asarray(_data) elif format in ['list', 'l', 'L']: return numpy.asarray(_data).tolist() elif format in ['P', 'p', 'pandas', 'pd']: return _data
def QA_fetch_get_index_min(code, start, end, frequence='1min', ip=None, port=None): '指数分钟线' global best_ip if ip is None and port is None and best_ip['stock']['ip'] is None and best_ip['stock']['port'] is None: best_ip = select_best_ip() ip = best_ip['stock']['ip'] port = best_ip['stock']['port'] elif ip is None and port is None and best_ip['stock']['ip'] is not None and best_ip['stock']['port'] is not None: ip = best_ip['stock']['ip'] port = best_ip['stock']['port'] else: pass api = TdxHq_API() type_ = '' start_date = str(start)[0:10] today_ = datetime.date.today() lens = QA_util_get_trade_gap(start_date, today_) if str(frequence) in ['5', '5m', '5min', 'five']: frequence, type_ = 0, '5min' lens = 48 * lens elif str(frequence) in ['1', '1m', '1min', 'one']: frequence, type_ = 8, '1min' lens = 240 * lens elif str(frequence) in ['15', '15m', '15min', 'fifteen']: frequence, type_ = 1, '15min' lens = 16 * lens elif str(frequence) in ['30', '30m', '30min', 'half']: frequence, type_ = 2, '30min' lens = 8 * lens elif str(frequence) in ['60', '60m', '60min', '1h']: frequence, type_ = 3, '60min' lens = 4 * lens if lens > 20800: lens = 20800 with api.connect(ip, port): if str(code)[0] in ['5', '1']: # ETF data = pd.concat([api.to_df(api.get_security_bars( frequence, 1 if str(code)[0] in ['0', '8', '9', '5'] else 0, code, (int(lens / 800) - i) * 800, 800)) for i in range(int(lens / 800) + 1)], axis=0) else: data = pd.concat([api.to_df(api.get_index_bars( frequence, 1 if str(code)[0] in ['0', '8', '9', '5'] else 0, code, (int(lens / 800) - i) * 800, 800)) for i in range(int(lens / 800) + 1)], axis=0) data = data\ .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\ .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\ .assign(code=code)\ .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\ .assign(type=type_).set_index('datetime', drop=False, inplace=False)[start:end] # data return data.assign(datetime=data['datetime'].apply(lambda x: str(x)))
def QA_DataAggrement_Stock_min(data): data = data.reset_index().rename(columns={ 'index': 'datetime', 'money': 'amount' }) data = data.assign(date=data['datetime'].apply(lambda x: str(x)[0:10])) data = data \ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x))) \ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x))) data['source'] = DATASOURCE.JQDATA return data
def QA_fetch_get_future_min(code, start, end, frequence='1min', ip=best_ip['future'], port=7727): '期货数据 分钟线' apix = TdxExHq_API() type_ = '' start_date = str(start)[0:10] today_ = datetime.date.today() lens = QA_util_get_trade_gap(start_date, today_) global extension_market_info extension_market_info = QA_fetch_get_future_list( ) if extension_market_info is None else extension_market_info if str(frequence) in ['5', '5m', '5min', 'five']: frequence, type_ = 0, '5min' lens = 48 * lens elif str(frequence) in ['1', '1m', '1min', 'one']: frequence, type_ = 8, '1min' lens = 240 * lens elif str(frequence) in ['15', '15m', '15min', 'fifteen']: frequence, type_ = 1, '15min' lens = 16 * lens elif str(frequence) in ['30', '30m', '30min', 'half']: frequence, type_ = 2, '30min' lens = 8 * lens elif str(frequence) in ['60', '60m', '60min', '1h']: frequence, type_ = 3, '60min' lens = 4 * lens if lens > 20800: lens = 20800 with apix.connect(ip, port): code_market = extension_market_info.query('code=="{}"'.format(code)) data = pd.concat([ apix.to_df( apix.get_instrument_bars(frequence, int(code_market.market), str(code), (int(lens / 700) - i) * 700, 700)) for i in range(int(lens / 700) + 1) ], axis=0) data = data\ .assign(datetime=pd.to_datetime(data['datetime']), code=str(code))\ .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False)\ .assign(date=data['datetime'].apply(lambda x: str(x)[0:10]))\ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x)))\ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x)))\ .assign(type=type_).set_index('datetime', drop=False, inplace=False)[start:end] return data.assign(datetime=data['datetime'].apply(lambda x: str(x)))
def QA_fetch_stock_min(code, startTime, endTime, type_='numpy', collections=QA_Setting.client.quantaxis.stock_min_five): list_a = [] for item in collections.find({ 'code': str(code), "datetime": { "$lte": QA_util_time_stamp(startTime), "$gte": QA_util_time_stamp(endTime) } }): # print(item['code']) list_a.append([str(item['code']), float(item['open']), float(item['high']), float( item['low']), float(item['close']), float(item['volume']), item['date']]) if type_ == 'numpy': data = numpy.asarray(list_a) elif type_ == 'list': data = list_a elif type_ == 'pandas': data = DataFrame(list_a, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'date']) return data
def QA_DataAggrement_Future_min(data): if 'contract' not in data.columns: data['contract'] = 'uf' if 'position' not in data.columns: data['position'] = 0 data = data.reset_index().rename(columns={ 'index': 'datetime', 'money': 'amount' }) data = data.assign(date=data['datetime'].apply(lambda x: str(x)[0:10])) data = data \ .assign(tradetime=pd.to_datetime(data['datetime'].apply(QA_util_future_to_tradedatetime))) \ .assign(date_stamp=data['datetime'].apply(lambda x: QA_util_date_stamp(x))) \ .assign(time_stamp=data['datetime'].apply(lambda x: QA_util_time_stamp(x))) data['source'] = DATASOURCE.JQDATA return data
def getMin(cls, code, start, end, if_fq='00', frequence=8): """获取分钟级别的数据 Args: code: 代码 6位长度的str start: 10位长度的日期字符串 比如'2017-01-01' end: 10位长度的日期字符串 比如'2018-01-01' if_fq: frequence: Returns: pd.DataFrame/None -- 返回的是dataframe,如果出错比如只获 取了一天,而当天停牌,返回None """ # type_ = '' start = str(start)[0:10] today_ = datetime.date.today() lens = QA_util_get_trade_gap(start, today_) _, type_, multiplicator = cls.getReverseFrequence(frequence) lens = lens * multiplicator if lens > 20800: lens = 20800 with cls.tdxapi.connect(cls.ip, cls.port) as api: data = pd.concat([ api.to_df( api.get_security_bars( frequence, _select_market_code(str(code)), str(code), (int(lens / 800) - i) * 800, 800)) for i in range(int(lens / 800) + 1) ], axis=0, sort=False) data = data \ .drop(['year', 'month', 'day', 'hour', 'minute'], axis=1, inplace=False) \ .assign(datetime=pd.to_datetime(data['datetime']), code=str(code), date=data['datetime'].apply(lambda x: str(x)[0:10]), date_stamp=data['datetime'].apply( lambda x: QA_util_date_stamp(x)), time_stamp=data['datetime'].apply( lambda x: QA_util_time_stamp(x)), type=type_).set_index('datetime', drop=False, inplace=False)[start:end] return data.assign( datetime=data['datetime'].apply(lambda x: str(x)))
def __transform_jq_to_qa(df, code, type_): """ 处理 jqdata 分钟数据为 qa 格式,并存入数据库 1. jdatasdk 数据格式: open close high low volume money 2018-12-03 09:31:00 10.59 10.61 10.61 10.59 8339100.0 88377836.0 2. 与 QUANTAXIS.QAFetch.QATdx.QA_fetch_get_stock_min 获取数据进行匹配,具体处理详见相应源码 open close high low vol amount ... datetime 2018-12-03 09:31:00 10.99 10.90 10.99 10.90 2.211700e+06 2.425626e+07 ... """ if df is None or len(df) == 0: raise ValueError("没有聚宽数据") df = df.reset_index().rename(columns={ "index": "datetime", "volume": "vol", "money": "amount" }) df["code"] = code df["date"] = df.datetime.map(str).str.slice(0, 10) df = df.set_index("datetime", drop=False) df["date_stamp"] = df["date"].apply(lambda x: QA_util_date_stamp(x)) df["time_stamp"] = ( df["datetime"].map(str).apply(lambda x: QA_util_time_stamp(x))) df["type"] = type_ return df[[ "open", "close", "high", "low", "vol", "amount", "datetime", "code", "date", "date_stamp", "time_stamp", "type", ]]