def QA_fetch_future_day(code, start, end, format='numpy', collections=DATABASE.future_day): start = str(start)[0:10] end = str(end)[0:10] code = QA_util_code_tolist(code, auto_fill=False) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): __data = [] cursor = collections.find( { 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.FUTURE_DAY, query=None) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_future_day data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_fetch_stock_divyield(code, start, end=None, format='pd', collections=DATABASE.stock_divyield): '获取股票日线' #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): __data = [] cursor = collections.find( { 'a_stockcode': { '$in': code }, "dir_dcl_date": { "$lte": end, "$gte": start } }, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop_duplicates((['dir_dcl_date', 'a_stockcode'])) res = res.ix[:, [ 'a_stockcode', 'a_stocksname', 'div_info', 'div_type_code', 'bonus_shr', 'cash_bt', 'cap_shr', 'epsp', 'ps_cr', 'ps_up', 'reg_date', 'dir_dcl_date', 'a_stockcode1', 'ex_divi_date', 'prg' ]] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_divyield format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_stock_divyield data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_stock_min(code, start, end, frequence='1min', format='numpy', collections=DATABASE.stock_min): '获取股票分钟线' if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): '''数据获取''' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' else: QA_util_log_info( "QA Error QA_fetch_stock_min parameter frequence=%s is none of 1min 1m 5min 5m 15min 15m 30min 30m 60min 60m" % frequence) __data = [] # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_MIN, query='volume>1') print(res) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_stock_min data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_fetch_stock_financial_calendar(code, start, end=None, format='pd', collections=DATABASE.report_calendar): '获取股票日线' #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): __data = [] cursor = collections.find( { 'code': { '$in': code }, "real_date": { "$lte": end, "$gte": start } }, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop_duplicates((['report_date', 'code'])) res = res.ix[:, [ 'code', 'name', 'pre_date', 'first_date', 'second_date', 'third_date', 'real_date', 'codes', 'report_date', 'crawl_date' ]] except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_stock_financial_calendar format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_stock_financial_calendar data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_index_day(code, start, end, format='numpy', collections=DATABASE.index_day): '获取指数日线' start = str(start)[0:10] end = str(end)[0:10] code = QA_util_code_tolist(code) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): cursor = collections.find( { 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) try: res = res.assign(volume=res.vol, date=pd.to_datetime(res.date)).drop_duplicates( (['date', 'code'])).set_index('date', drop=False) except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error QA_fetch_index_day format parameter %s is none of \"P, p, pandas, pd , n, N, numpy !\" " % format) return None else: QA_util_log_info( 'QA Error QA_fetch_index_day data parameter start=%s end=%s is not right' % (start, end))
def QA_fetch_stock_day(code, start, end, format='numpy', frequence='day', collections=DATABASE.stock_day): """'获取股票日线' Returns: [type] -- [description] 感谢@几何大佬的提示 https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/#return-the-specified-fields-and-the-id-field-only """ if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): '''数据获取''' start_date = str(start)[0:10] end_date = str(end)[0:10] #code= [code] if isinstance(code,str) else code # code checking code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "date_stamp": { "$lte": QA_util_date_stamp(end_date), "$gte": QA_util_date_stamp(start_date) } }, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_DAY, query='volume>1') '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_stock_day data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_fetch_future_min(code, start, end, frequence='1min', format='numpy', collections=DATABASE.future_min): '获取股票分钟线' if frequence in ['1min', '1m']: frequence = '1min' elif frequence in ['5min', '5m']: frequence = '5min' elif frequence in ['15min', '15m']: frequence = '15min' elif frequence in ['30min', '30m']: frequence = '30min' elif frequence in ['60min', '60m']: frequence = '60min' start = str(start)[0:19] end = str(end)[0:19] code = QA_util_code_tolist(code, auto_fill=False) if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$gte": QA_util_time_stamp(start), "$lte": QA_util_time_stamp(end) }, 'type': frequence }, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.FUTURE_MIN, query=None) '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: QA_util_log_info( 'QA Error QA_fetch_future_min data parameter start=%s end=%s is not right' % (start, end)) return None
def get_price(security = None, start_date=None, end_date=None, frequency=None, fields=None, skip_paused=False, fq=None, count=None): if (QA_util_dateordatetime_valid(start_date)) & (QA_util_dateordatetime_valid(end_date)): account,password = get_config(account=None, password=None, remember=False) token = get_token(account, password) if fq in [None,'none']: fq_ref_date = None elif fq == 'pre': fq_ref_date = end_date elif fq == 'post': fq_ref_date = start_date else: assert False,"fq 参数应该是下列选项之一: None, 'pre', 'post', 'none'" body={ "method": "get_price_period", "token": token } if security != None: body['code'] = security if start_date != None: body['date'] = start_date if end_date != None: body['end_date'] = end_date if frequency != None: body['unit'] = frequency if fq_ref_date != None: body['fq_ref_date'] = fq_ref_date response = requests.post(url, data = json.dumps(body)) data = QA_util_to_pandas_from_RequestsResponse(response) data = data.set_index('date') data.index.name = 'index' if frequency == '1d': if skip_paused: data = data[data['paused'] == '0'] data['paused'] = data['paused'].astype('int64') data['high_limit'] = data['high_limit'].astype('float64') data['low_limit'] = data['low_limit'].astype('float64') if fields == None: fields = ['open','close','high','low','volume','money'] data['open'] = data['open'].astype('float64') data['close'] = data['close'].astype('float64') data['high'] = data['high'].astype('float64') data['low'] = data['low'].astype('float64') data['volume'] = data['volume'].astype('int64') data['money'] = data['money'].astype('float64') data.index = pd.to_datetime(data.index) return data[fields] else: QA_util_log_info( 'QA Error QA_fetch_stock_transaction data parameter start=%s end=%s is not right' % (start, end)) return None
def QA_fetch_get_future_min(code, start, end, frequence='1min', fill_data_with_tick_database=False, fill_data_with_tick_online=False, method='api', account=None, password=None, remember=False): assert QA_util_dateordatetime_valid(start), 'start input format error' assert QA_util_dateordatetime_valid(end), 'end input format error' jqcode = _QA_code_toJQDATA(code, 'futures') jqfrequence = _QA_freq_toJQDATA(frequence) if method == 'api': JQDATA_login(account=account, password=password, remember=remember) data = jqdatasdk.get_price(security=jqcode, start_date=start, end_date=end, frequency=jqfrequence, fields=None, skip_paused=True, fq=None, count=None) # jqdatasdk.logout() elif method == 'http': data = jqdatahttp.get_price(security=jqcode, start_date=start, end_date=end, frequency=jqfrequence, fields=None, skip_paused=True, fq=None, count=None) data['code'] = code data['type'] = frequence return select_DataAggrement(DATABASE_NAME.FUTURE_MIN)(DATASOURCE.JQDATA, data)
def QA_fetch_stock_full(date, format='numpy', collections=DATABASE.stock_day): '获取全市场的某一日的数据' Date = str(date)[0:10] if (QA_util_dateordatetime_valid(date)): __data = [] for item in collections.find({"date_stamp": QA_util_date_stamp(Date)}, batch_size=10000): __data.append([ str(item['code']), float(item['open']), float(item['high']), float(item['low']), float(item['close']), float(item['vol']), item['date'] ]) # 多种数据格式 if format in ['n', 'N', 'numpy']: __data = numpy.asarray(__data) elif format in ['list', 'l', 'L']: __data = __data elif format in ['P', 'p', 'pandas', 'pd']: __data = DataFrame(__data, columns=[ 'code', 'open', 'high', 'low', 'close', 'volume', 'date' ]) __data['date'] = pd.to_datetime(__data['date']) __data = __data.set_index('date', drop=False) else: print( "QA Error QA_fetch_stock_full format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return __data else: QA_util_log_info( 'QA Error QA_fetch_stock_full data parameter date=%s not right' % date)
def QA_fetch_stock_transaction(code, start, end, format='numpy', frequence=None, collections=DATABASE.stock_transaction): """'获取股票tick结果' frequence 提供resample功能 Returns: [type] -- [description] 感谢@几何大佬的提示 https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/#return-the-specified-fields-and-the-id-field-only """ if (QA_util_dateordatetime_valid(start)) & ( QA_util_dateordatetime_valid(end)): '''数据获取''' code = QA_util_code_tolist(code) cursor = collections.find( { 'code': { '$in': code }, "time_stamp": { "$lte": QA_util_time_stamp(end), "$gte": QA_util_time_stamp(start) } }, {"_id": 0}, batch_size=10000) res = pd.DataFrame([item for item in cursor]) '''若frequence开关开启: 整理tick数据为分钟数据''' if len(res) > 0: if frequence == None: pass elif frequence == '1min': res = QA_data_stocktick_resample_1min( res, '1min', 'database_tick_resample', True) elif frequence in ['5min', '15min', '30min', '60min']: res = QA_data_stocktick_resample_1min( res, '1min', 'database_tick_resample', True) res = QA_data_min_resample_stock(res, frequence, 'database_tick1min_resample') if frequence == None: '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)''' res = __QA_fetch_query_filter( res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_TRANSACTION, query='volume>1') '''数据格式整理''' return QA_util_to_anyformat_from_pandas(data=res, format=format) else: return res else: return res else: QA_util_log_info( 'QA Error QA_fetch_stock_transaction data parameter start=%s end=%s is not right' % (start, end)) return None