def QA_fetch_future_day(code, start, end, format='numpy', collections=DATABASE.future_day): start = str(start)[0:10] end = str(end)[0:10] code = QA_util_code_tolist(code, auto_fill=False) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): __data = [] cursor = collections.find( { 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.FUTURE_DAY, query=None) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_future_day data parameter start=%s end=%s is not right' % (start, end)) return None
async def QA_fetch_stock_day(code, start, end, format='numpy', frequence='day', collections=DATABASE_ASYNC.stock_day): '获取股票日线' start = str(start)[0:10] end = str(end)[0:10] # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): __data = [] cursor = collections.find({ 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }) try: res = pd.DataFrame([item async for item in cursor]) except SyntaxError: print('THIS PYTHON VERSION NOT SUPPORT "async for" function') pass try: res = res.drop( '_id', axis=1).assign(volume=res.vol).query('volume>1').assign( date=pd.to_datetime(res.date)).drop_duplicates( (['date', 'code'])).set_index('date', drop=False) res = res.ix[:, [ 'code', 'open', 'high', 'low', 'close', 'volume', 'amount', 'date' ]] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_day format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_stock_day data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_index_day(code, start, end, format='numpy', collections=DATABASE.index_day): '获取指数日线' start = str(start)[0:10] end = str(end)[0:10] code = QA_util_code_tolist(code) if QA_util_date_valid(end) == True: __data = [] cursor = collections.find({ 'code': {'$in': code}, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start)}}) if format in ['dict', 'json']: return [data for data in cursor] for item in cursor: __data.append([str(item['code']), float(item['open']), float(item['high']), float( item['low']), float(item['close']), float(item['vol']), item['date']]) # 多种数据格式 if format in ['n', 'N', 'numpy']: __data = numpy.asarray(__data) elif format in ['list', 'l', 'L']: __data = __data elif format in ['P', 'p', 'pandas', 'pd']: __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'date']) __data['date'] = pd.to_datetime(__data['date']) __data = __data.set_index('date', drop=False) return __data else: QA_util_log_info('something wrong with date')
def QA_count_eastmoney_stock_xjlc_record_count_one_by_one( self, str_stock_code, collections=DATABASE.eastmoney_stock_zjlx): codeArray = QA_util_code_tolist(str_stock_code) cursor = collections.find({ 'stock_code': { '$in': codeArray } }).sort('date') sizeRec = cursor.count() firstRec = None lastRec = None if sizeRec > 0: firstRec = cursor[0] lastRec = cursor[sizeRec - 1] #返回 【code 记录条数 开始日期 结束日期 】 #print(firstRec) #print(lastRec) firstRecDate = None if firstRec is not None: firstRecDate = firstRec['date'] lastRecDate = None if lastRec is not None: lastRecDate = lastRec['date'] return [str_stock_code, sizeRec, firstRecDate, lastRecDate]
def QA_fetch_stock_min(code, start, end, format='numpy', frequence='1min', collections=DATABASE.stock_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' else: print( "💢 Error QA_fetch_stock_min parameter frequence=%s is none of 1min 1m 5min 5m 15min 15m 30min 30m 60min 60m" % frequence) __data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find({ 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }) res = pd.DataFrame([item for item in cursor]) try: res = res.drop( '_id', axis=1).assign(volume=res.vol).query('volume>1').assign( datetime=pd.to_datetime(res.datetime)).drop_duplicates( ['datetime', 'code']).set_index('datetime', drop=False) # return res except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "💢 Error QA_fetch_stock_min format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None
def QA_fetch_index_day(code, start, end, format='numpy', collections=DATABASE.index_day): '获取指数日线' start = str(start)[0:10] end = str(end)[0:10] code = QA_util_code_tolist(code) if QA_util_date_valid(end) == True: cursor = collections.find({ 'code': {'$in': code}, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start)}}, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) try: res = res.assign(volume=res.vol, date=pd.to_datetime( res.date)).drop_duplicates((['date', 'code'])).set_index('date', drop=False) except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print("QA Error QA_fetch_index_day format parameter %s is none of \"P, p, pandas, pd , n, N, numpy !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_index_day data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_ctp_tick(code, start, end, frequence, format='pd', collections=DATABASE.ctp_tick): """仅供存储的ctp tick使用 Arguments: code {[type]} -- [description] Keyword Arguments: format {str} -- [description] (default: {'pd'}) collections {[type]} -- [description] (default: {DATABASE.ctp_tick}) Returns: [type] -- [description] """ code = QA_util_code_tolist(code, auto_fill=False) cursor = collections.find({ 'InstrumentID': {'$in': code}, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, {"_id": 0}, batch_size=10000) hq = pd.DataFrame([data for data in cursor]).replace(1.7976931348623157e+308, numpy.nan).replace('', numpy.nan).dropna(axis=1) p1 = hq.loc[:, ['ActionDay', 'AskPrice1', 'AskVolume1', 'AveragePrice', 'BidPrice1', 'BidVolume1', 'HighestPrice', 'InstrumentID', 'LastPrice', 'OpenInterest', 'TradingDay', 'UpdateMillisec', 'UpdateTime', 'Volume']] p1 = p1.assign(datetime=p1.ActionDay.apply(QA_util_date_int2str)+' '+p1.UpdateTime + (p1.UpdateMillisec/1000000).apply(lambda x: str('%.6f' % x)[1:]), code=p1.InstrumentID) p1.datetime = pd.to_datetime(p1.datetime) return p1.set_index(p1.datetime)
def QA_fetch_future_day(code, start, end, format='numpy', collections=DATABASE.future_day): start = str(start)[0:10] end = str(end)[0:10] code = QA_util_code_tolist(code, auto_fill=False) if QA_util_date_valid(end) == True: __data = [] cursor = collections.find({ 'code': {'$in': code}, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start)}}, {"_id": 0}, batch_size=10000) if format in ['dict', 'json']: return [data for data in cursor] for item in cursor: __data.append([str(item['code']), float(item['open']), float(item['high']), float( item['low']), float(item['close']), float(item['position']), float(item['price']), float(item['trade']), item['date']]) # 多种数据格式 if format in ['n', 'N', 'numpy']: __data = numpy.asarray(__data) elif format in ['list', 'l', 'L']: __data = __data elif format in ['P', 'p', 'pandas', 'pd']: __data = DataFrame( __data, columns=['code', 'open', 'high', 'low', 'close', 'position', 'price', 'trade', 'date']).drop_duplicates() __data['date'] = pd.to_datetime(__data['date']) __data = __data.set_index('date', drop=False) else: print("QA Error QA_fetch_future_day format parameter %s is none of \"P, p, pandas, pd , n, N, numpy !\" " % format) return __data else: QA_util_log_info('QA something wrong with date')
def QA_fetch_stock_xdxr(code, format='pd', collections=DATABASE.stock_xdxr): '获取股票除权信息/数据库' code = QA_util_code_tolist(code) data = pd.DataFrame([item for item in collections.find( {'code': {'$in': code}}, batch_size=10000)]).drop(['_id'], axis=1) data['date'] = pd.to_datetime(data['date']) return data.set_index('date', drop=False)
def _QA_fetch_stock_adj(code, start, end, format='pd', collections=DATABASE.stock_adj): """获取股票复权系数 ADJ """ start = str(start)[0:10] end = str(end)[0:10] #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): cursor = collections.find( { 'code': { '$in': code }, "date": { "$lte": end, "$gte": start } }, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) res.date = pd.to_datetime(res.date) return res.set_index('date', drop=False)
def QA_SU_save_financial_report_day(code=None, client=DATABASE, ui_log=None, ui_progress=None): ''' save stock_day 保存财报日历 历史全部数据 :return: ''' def __saving_work(code, stock_financial): try: QA_util_log_info( '##JOB01 Now Saving WY financial_report==== {}'.format( str(code)), ui_log) stock_financial.insert_many(QA_util_to_json_from_pandas( QA_fetch_get_stock_report_wy(code)), ordered=False) gc.collect() except Exception as error0: print(error0) err.append(str(code)) if code is None: code = QA_fetch_financial_code() else: code = QA_util_code_tolist(code) if code is not None: stock_financial = client.stock_financial_wy stock_financial.create_index([("code", pymongo.ASCENDING), ("report_date", pymongo.ASCENDING)], unique=True) err = [] for item in code: QA_util_log_info('The {} of Total {}'.format( (code.index(item) + 1), len(code))) strProgressToLog = 'DOWNLOAD PROGRESS {}'.format( str(float( (code.index(item) + 1) / len(code) * 100))[0:4] + '%', ui_log) intProgressToLog = int( float((code.index(item) + 1) / len(code) * 100)) QA_util_log_info(strProgressToLog, ui_log=ui_log, ui_progress=ui_progress, ui_progress_int_value=intProgressToLog) __saving_work(item, stock_financial) if len(err) < 1: QA_util_log_info('SUCCESS save WY financial_report ^_^', ui_log) else: QA_util_log_info(' ERROR CODE \n ', ui_log) QA_util_log_info(err, ui_log) else: QA_util_log_info(' No report send \n ', ui_log)
def QA_fetch_stock_technical_index(code, start, end=None, type='day', format='pd'): '获取股票日线' #code= [code] if isinstance(code,str) else code # code checking if type == 'day': collections = DATABASE.stock_technical_index elif type == 'week': collections = DATABASE.stock_technical_week elif type == 'month': collections = DATABASE.stock_technical_month else: print("type should be in ['day', 'week', 'month']") code = QA_util_code_tolist(code) if QA_util_date_valid(end): __data = [] cursor = collections.find( { 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop_duplicates((['code', 'date'])) res['date'] = res['date'].apply(lambda x: str(x)[0:10]) res = res.drop(['date_stamp'], axis=1).set_index(['date', 'code']) except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_technical_index format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_stock_technical_index data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_index_min(code, start, end, format='numpy', frequence='1min', collections=DATABASE.index_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' __data = [] code = QA_util_code_tolist(code) cursor = collections.find({ 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }) if format in ['dict', 'json']: return [data for data in cursor] for item in cursor: __data.append([ str(item['code']), float(item['open']), float(item['high']), float(item['low']), float(item['close']), float(item['vol']), item['datetime'], item['time_stamp'], item['date'] ]) __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'datetime', 'time_stamp', 'date' ]) __data['datetime'] = pd.to_datetime(__data['datetime']) __data = __data.set_index('datetime', drop=False) if format in ['numpy', 'np', 'n']: return numpy.asarray(__data) elif format in ['list', 'l', 'L']: return numpy.asarray(__data).tolist() elif format in ['P', 'p', 'pandas', 'pd']: return __data
def QA_fetch_stock_divyield(code, start, end=None, format='pd', collections=DATABASE.stock_divyield): '获取股票日线' #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): __data = [] cursor = collections.find( { 'a_stockcode': { '$in': code }, "dir_dcl_date": { "$lte": end, "$gte": start } }, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop('_id', axis=1).drop_duplicates( (['dir_dcl_date', 'a_stockcode'])) res = res.ix[:, [ 'a_stockcode', 'a_stocksname', 'div_info', 'div_type_code', 'bonus_shr', 'cash_bt', 'cap_shr', 'epsp', 'ps_cr', 'ps_up', 'reg_date', 'dir_dcl_date', 'a_stockcode1', 'ex_divi_date', 'prg' ]] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_divyield format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_stock_divyield data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_stock_info(code, format='pd', collections=DATABASE.stock_info): code = QA_util_code_tolist(code) try: data = pd.DataFrame([item for item in collections.find( {'code': {'$in': code}}, {"_id": 0}, batch_size=10000)]) #data['date'] = pd.to_datetime(data['date']) return data.set_index('code', drop=False) except Exception as e: QA_util_log_info(e) return None
def QA_fetch_stock_block(code=None, format='pd', collections=DATABASE.stock_block): if code is not None: code = QA_util_code_tolist(code) data = pd.DataFrame([item for item in collections.find( {'code': {'$in': code}}, batch_size=10000)]).drop(['_id'], axis=1) return data.set_index('code', drop=False) else: data = pd.DataFrame( [item for item in collections.find()]).drop(['_id'], axis=1) return data.set_index('code', drop=False)
def QA_fetch_stock_financial_calendar(code, start, end=None, format='pd', collections=DATABASE.report_calendar): '获取股票日线' #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): __data = [] cursor = collections.find( { 'code': { '$in': code }, "real_date": { "$lte": end, "$gte": start } }, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop('_id', axis=1).drop_duplicates( (['report_date', 'code'])) res = res.ix[:, [ 'code', 'name', 'pre_date', 'first_date', 'second_date', 'third_date', 'real_date', 'codes', 'report_date', 'crawl_date' ]] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_financial_calendar format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_stock_financial_calendar data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_stock_min(code, start, end, frequence='1min', format='numpy', collections=DATABASE.stock_min): '获取股票分钟线' if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): '''数据获取''' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' else: QA_util_log_info( "QA Error QA_fetch_stock_min parameter frequence=%s is none of 1min 1m 5min 5m 15min 15m 30min 30m 60min 60m" % frequence) __data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_MIN, query='volume>1') print(res) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_stock_min data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_fetch_stock_xdxr(code, format='pd', collections=DATABASE.stock_xdxr): '获取股票除权信息/数据库' code = QA_util_code_tolist(code) data = pd.DataFrame([ item for item in collections.find({'code': { '$in': code }}, batch_size=10000) ]).drop(['_id'], axis=1) return __QA_fetch_query_filter(data, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_XDXR, query=None)
def QA_fetch_stock_day(code, start, end, format='numpy', frequence='day', collections=DATABASE.stock_day): '获取股票日线' start = str(start)[0:10] end = str(end)[0:10] #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end) == True: __data = [] cursor = collections.find({ 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop( '_id', axis=1).assign(volume=res.vol).query('volume>1').assign( date=pd.to_datetime(res.date)).drop_duplicates( (['date', 'code'])).set_index('date', drop=False) res = res.ix[:, [ 'code', 'open', 'high', 'low', 'close', 'volume', 'amount', 'date' ]] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: return None else: QA_util_log_info('something wrong with date')
def QA_fetch_index_transaction(code, start, end, format='numpy', frequence='tick', collections=DATABASE.index_transaction): '获取股票分钟线' if frequence in ['tick', 'TICK', 'transaction']: frequence = 'tick' else: print( "QA Error QA_fetch_index_transaction parameter frequence=%s is none of tick Tick transaction" % frequence) _data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) try: res = res.assign(volume=res.vol, datetime=pd.to_datetime( res.datetime)).query('volume>1').drop_duplicates( ['datetime', 'code']).set_index('datetime', drop=False) # return res except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_index_transaction format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None
def QA_fetch_stock_pure_tech_indicator(code, start, end, vol='non-zero', keys='all', format='pd', collections=DATABASE.stock_tech_indicator_3): """ return: from start to end, doesn't exclude vol==0 dates in between, those date are continuous and aligned with sh index already """ start = str(start)[0:10] end = str(end)[0:10] # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): if vol == 'non-zero': cursor = collections.find({ 'code': {'$in': code}, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start)}, 'volume': {"$gt": 0}}) else: cursor = collections.find({ 'code': {'$in': code}, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start)}}) res = pd.DataFrame([item for item in cursor]) try: # drop '_id', change 'date' from str to datetime, set 'date' to index res = res.drop('_id', axis=1).assign(date=pd.to_datetime( res.date)).drop_duplicates((['date', 'code'])).set_index('date', drop=False) if keys != 'all': if isinstance(keys, str): res = res.ix[:, [keys]] elif isinstance(keys, list): res = res.ix[:, keys] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print("QA Error QA_fetch_tech_indicator format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_tech_indicator data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_stock_fianacial(code, start, end=None, format='pd', collections=DATABASE.stock_financial_analysis): '获取quant基础数据' #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): cursor = collections.find( { 'CODE': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res.columns = [ i.lower() if i == 'CODE' else i for i in list(res.columns) ] res = res.drop(['date_stamp', '_id'], axis=1).drop_duplicates( (['code', 'date'])) res['RNG_RES'] = res['AVG60_RNG'] * 60 / res['RNG_60'] except: res = None if format in ['P', 'p', 'pandas', 'pd']: #res['report_date'] = pd.to_datetime(res['report_date']/1000, unit='s') return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_financial_TTM format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_financial_TTM data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_financial_TTM(code, start, end=None, format='pd', collections=DATABASE.financial_TTM): '获取财报TTM数据' #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): __data = [] cursor = collections.find( { 'CODE': { '$in': code }, "date": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop('_id', axis=1).drop_duplicates( (['REPORT_DATE', 'CODE'])) except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_financial_TTM format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_financial_TTM data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_stock_day(code, start, end, format='numpy', frequence='day', collections=DATABASE.stock_day): """'获取股票日线' Returns: [type] -- [description] 感谢@几何大佬的提示 https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/#return-the-specified-fields-and-the-id-field-only """ start = str(start)[0:10] end = str(end)[0:10] #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if QA_util_date_valid(end): cursor = collections.find({ 'code': {'$in': code}, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start)}}, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.assign(volume=res.vol, date=pd.to_datetime( res.date)).drop_duplicates((['date', 'code'])).query('volume>1').set_index('date', drop=False) res = res.ix[:, ['code', 'open', 'high', 'low', 'close', 'volume', 'amount', 'date']] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print("QA Error QA_fetch_stock_day format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_stock_day data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_stock_info(code, format='pd', collections=DATABASE.stock_info): code = QA_util_code_tolist(code) try: data = pd.DataFrame([ item for item in collections.find({'code': { '$in': code }}, {"_id": 0}, batch_size=10000) ]) #data['date'] = pd.to_datetime(data['date']) return __QA_fetch_query_filter( data, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_INFO, query=None) except Exception as e: QA_util_log_info(e) return None
def QA_fetch_stock_day(code, start, end, format='numpy', frequence='day', collections=DATABASE.stock_day): """'获取股票日线' Returns: [type] -- [description] 感谢@几何大佬的提示 https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/#return-the-specified-fields-and-the-id-field-only """ if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): '''数据获取''' start_date = str(start)[0:10] end_date = str(end)[0:10] #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end_date), "$gte": QA_util_date_stamp(start_date) } }, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_DAY, query='volume>1') '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_stock_day data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_fetch_future_min(code, start, end, frequence='1min', format='numpy', collections=DATABASE.future_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' start = str(start)[0:19] end = str(end)[0:19] code = QA_util_code_tolist(code, auto_fill=False) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.FUTURE_MIN, query=None) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_future_min data parameter start=%s end=%s is not right' % (start, end)) return None
def getMin(cls, code, start, end, if_fq='00', frequence=8): collections = cls.collectionsMin '获取股票分钟线' _, type_, _ = cls.getReverseFrequence(frequence) _data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': type_ }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) try: res = res.assign( volume=res.vol, datetime=pd.to_datetime( res.datetime)).query('volume>1').drop_duplicates( ['datetime', 'code']).set_index('datetime', drop=False) # return res except: res = None if cls.format in ['P', 'p', 'pandas', 'pd']: return res elif cls.format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif cls.format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif cls.format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_min format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % cls.format) return None
def QA_fetch_future_min( code, start, end, format='numpy', frequence='1min', collections=DATABASE.future_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' _data = [] code = QA_util_code_tolist(code, auto_fill=False) cursor = collections.find({ 'code': {'$in': code}, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, batch_size=10000) if format in ['dict', 'json']: return [data for data in cursor] for item in cursor: _data.append([str(item['code']), float(item['open']), float(item['high']), float( item['low']), float(item['close']), float(item.get('position', 0)), float(item.get('price', 0)), float(item.get('trade', item.get('volume', 0))), item['datetime'], item['tradetime'], item['time_stamp'], item['date'], item['type']]) _data = DataFrame(_data, columns=[ 'code', 'open', 'high', 'low', 'close', 'position', 'price', 'trade', 'datetime', 'tradetime', 'time_stamp', 'date', 'type']) _data = _data.assign(datetime=pd.to_datetime(_data['datetime'])).drop_duplicates( (['datetime', 'code'])).set_index('datetime', drop=False) if format in ['numpy', 'np', 'n']: return numpy.asarray(_data) elif format in ['list', 'l', 'L']: return numpy.asarray(_data).tolist() elif format in ['P', 'p', 'pandas', 'pd']: return _data