def __gen_param(stock_list, coll_stock_day, ip_list=[]): results = [] count = len(ip_list) total = len(stock_list) for item in range(len(stock_list)): try: code = stock_list[item] QA_util_log_info( '##JOB01 Now Saving STOCK_DAY=== {}'.format(str(code)), ui_log ) # 首选查找数据库 是否 有 这个代码的数据 search_cond = {'code': str(code)[0:6]} ref = coll_stock_day.find(search_cond) end_date = str(now_time())[0:10] ref_count = coll_stock_day.count_documents(search_cond) # 当前数据库已经包含了这个代码的数据, 继续增量更新 # 加入这个判断的原因是因为如果股票是刚上市的 数据库会没有数据 所以会有负索引问题出现 if ref_count > 0: # 接着上次获取的日期继续更新 start_date = ref[ref_count - 1]['date'] # print("ref[ref.count() - 1]['date'] {} {}".format(ref.count(), coll_stock_day.count_documents({'code': str(code)[0:6]}))) else: # 当前数据库中没有这个代码的股票数据, 从1990-01-01 开始下载所有的数据 start_date = '1990-01-01' QA_util_log_info( 'UPDATE_STOCK_DAY \n Trying updating {} from {} to {}' .format(code, start_date, end_date), ui_log ) if start_date != end_date: # 更新过的,不更新 results.extend([(code, start_date, end_date, '00', 'day', ip_list[item % count]['ip'], ip_list[item % count]['port'], item, total, ui_log, ui_progress)]) except Exception as error0: print('Exception:{}'.format(error0)) err.append(code) return results
def __gen_param(stock_list, coll_stock_day, ip_list=[]): results = [] count = len(ip_list) total = len(stock_list) for item in range(len(stock_list)): try: code = stock_list[item] QA_util_log_info( '##JOB01 Now Saving STOCK_DAY==== {}'.format(str(code)), ui_log ) # 首选查找数据库 是否 有 这个代码的数据 search_cond = {'code': str(code)[0:6]} ref = coll_stock_day.find(search_cond) end_date = str(now_time())[0:10] ref_count = coll_stock_day.count_documents(search_cond) # 当前数据库已经包含了这个代码的数据, 继续增量更新 # 加入这个判断的原因是因为如果股票是刚上市的 数据库会没有数据 所以会有负索引问题出现 if ref_count > 0: # 接着上次获取的日期继续更新 start_date = ref[ref_count - 1]['date'] # print("ref[ref.count() - 1]['date'] {} {}".format(ref.count(), coll_stock_day.count_documents({'code': str(code)[0:6]}))) else: # 当前数据库中没有这个代码的股票数据, 从1990-01-01 开始下载所有的数据 start_date = '1990-01-01' QA_util_log_info( 'UPDATE_STOCK_DAY \n Trying updating {} from {} to {}' .format(code, start_date, end_date), ui_log ) if start_date != end_date: # 更新过的,不更新 results.extend([(code, start_date, end_date, '00', 'day', ip_list[item % count]['ip'], ip_list[item % count]['port'], item, total, ui_log, ui_progress)]) except Exception as error0: print('Exception:{}'.format(error0)) err.append(code) return results
def __saving_work(self, code): def __QA_log_info(code, end_time, start_time): def loginfo(prefix='', astr='', listCounts=5): if len(self._loginfolist) < listCounts: self._loginfolist.append(astr) else: str = '' for i in range(len(self._loginfolist)): str += self._loginfolist[i] + ' ' str += astr QA_util_log_info(prefix.format(str), self.ui_log) self._loginfolist.clear() index_or_etf = self.get_index_or_etf_from_code(code) prefix = '##JOB04 Saving {}_DAY ==== Trying updating\n{}'.format( index_or_etf, '{}') loginfo(prefix, ' {} from {} to {}'.format(code, start_time, end_time)) # log_info = '##JOB04 Saving {}_DAY====\nTrying updating {} from {} to {}'.format( # index_or_etf, # code, # start_time, # end_time # ) # QA_util_log_info( # log_info, # ui_log=self.ui_log # ) try: search_cond = {'code': str(code)[0:6]} ref_ = get_coll().find(search_cond) ref_count = get_coll().count_documents(search_cond) end_time = str(now_time())[0:10] if ref_count > 0: start_time = ref_[ref_count - 1]['date'] __QA_log_info(code, end_time, start_time) if start_time != end_time: get_coll().insert_many( QA_util_to_json_from_pandas( QA_fetch_get_index_day( str(code), QA_util_get_next_day(start_time), end_time))) else: try: start_time = '1990-01-01' __QA_log_info(code, end_time, start_time) get_coll().insert_many( QA_util_to_json_from_pandas( QA_fetch_get_index_day(str(code), start_time, end_time))) except Exception as e: start_time = '2009-01-01' __QA_log_info(code, end_time, start_time) get_coll().insert_many( QA_util_to_json_from_pandas( QA_fetch_get_index_day(str(code), start_time, end_time))) except Exception as e: QA_util_log_info(e, ui_log=self.ui_log) self.err.append(str(code)) QA_util_log_info(self.err, ui_log=self.ui_log)
def QA_quotation_adv(code, start, end=save_tdx.now_time(), frequence='1min', market=MARKET_TYPE.STOCK_CN, source=DATASOURCE.AUTO, output=OUTPUT_FORMAT.DATAFRAME): """一个统一的获取k线的方法 如果source=DATASOURCE.AUTO,优先mongo,从本地数据库获取,mongo中未下载的数据从TDX中在线补全。(仅限股票) Arguments: code {str/list} -- 期货/股票的代码 start {str} -- 开始日期 end {str} -- 结束日期 frequence {enum} -- 频率 QA.FREQUENCE market {enum} -- 市场 QA.MARKET_TYPE source {enum} -- 来源 QA.DATASOURCE output {enum} -- 输出类型 QA.OUTPUT_FORMAT """ if pd.Timestamp(end) > pd.Timestamp(save_tdx.now_time()): end = save_tdx.now_time() res = None if market == MARKET_TYPE.STOCK_CN: if frequence == FREQUENCE.DAY or frequence == FREQUENCE.WEEK: if source == DATASOURCE.AUTO: try: # 返回的是QA_DataStruct_Stock_day对象,为了与在线获取的数据格式保持统一,转成单索引 res = QAQueryAdv.QA_fetch_stock_day_adv( code, start, end).data.reset_index(level='code') # res = QAQueryAdv.QA_fetch_stock_day_adv( # code, start, end).data.reset_index(level='code')[:14] start_date = res.index[-1] end_date = pd.Timestamp(end) if end_date-start_date > datetime.timedelta(hours=17): # 从TDX补充数据,由于仅考虑个股,在这里不做入库操作,入库还是需要save data_tdx = QATdx.QA_fetch_get_stock_day( code, QA_util_get_next_period(start_date, frequence), end_date, '00') # data_tdx与从数据库获取的数据格式上做一些统一。 data_tdx = data_tdx.rename(columns={"vol": "volume"}).drop([ 'date', 'date_stamp'], axis=1) data_tdx.index = pd.to_datetime(data_tdx.index) res = pd.concat([res, data_tdx], sort=True) res = QA_DataStruct_Stock_day( res.reset_index().set_index(['date', 'code'])) except: res = None if source == DATASOURCE.MONGO: try: res = QAQueryAdv.QA_fetch_stock_day_adv(code, start, end) except: res = None if source == DATASOURCE.TDX or res == None: res = QATdx.QA_fetch_get_stock_day(code, start, end, '00') res = QA_DataStruct_Stock_day(res.set_index(['date', 'code'])) elif source == DATASOURCE.TUSHARE: res = QATushare.QA_fetch_get_stock_day(code, start, end, '00') if frequence == FREQUENCE.WEEK: res = QA_DataStruct_Stock_day( QA_data_day_resample(res.data)) elif frequence in [FREQUENCE.ONE_MIN, FREQUENCE.FIVE_MIN, FREQUENCE.FIFTEEN_MIN, FREQUENCE.THIRTY_MIN, FREQUENCE.SIXTY_MIN]: if source == DATASOURCE.AUTO: try: # 返回的是QA_DataStruct_Stock_day对象,为了与在线获取的数据格式保持统一,转成单索引 res = QAQueryAdv.QA_fetch_stock_min_adv( code, start, end, frequence=frequence).data.reset_index(level='code') # res = QAQueryAdv.QA_fetch_stock_min_adv( # code, start, end, frequence=frequence).data.reset_index(level='code')[:710] start_date = res.index[-1] end_date = pd.Timestamp(end) if end_date > start_date: # 从TDX补充数据,由于仅考虑个股,在这里不做入库操作,入库还是需要save data_tdx = QATdx.QA_fetch_get_stock_min(code, QA_util_get_next_period( start_date, frequence), end_date, frequence=frequence) # data_tdx与从数据库获取的数据格式上做一些统一。 data_tdx = data_tdx.rename(columns={"vol": "volume"}).drop( ['date', 'datetime', 'date_stamp', 'time_stamp'], axis=1) data_tdx.index = pd.to_datetime(data_tdx.index) res = pd.concat([res, data_tdx], sort=True) res = QA_DataStruct_Stock_day( res.reset_index().set_index(['datetime', 'code'])) except: res = None if source == DATASOURCE.MONGO: try: res = QAQueryAdv.QA_fetch_stock_min_adv( code, start, end, frequence=frequence ) except: res = None if source == DATASOURCE.TDX or res == None: res = QATdx.QA_fetch_get_stock_min( code, start, end, frequence=frequence ) res = QA_DataStruct_Stock_min( res.set_index(['datetime', 'code']) ) elif market == MARKET_TYPE.FUTURE_CN: if frequence == FREQUENCE.DAY: if source == DATASOURCE.MONGO: try: res = QAQueryAdv.QA_fetch_future_day_adv(code, start, end) except: res = None if source == DATASOURCE.TDX or res is None: res = QATdx.QA_fetch_get_future_day(code, start, end) res = QA_DataStruct_Future_day(res.set_index(['date', 'code'])) elif frequence in [FREQUENCE.ONE_MIN, FREQUENCE.FIVE_MIN, FREQUENCE.FIFTEEN_MIN, FREQUENCE.THIRTY_MIN, FREQUENCE.SIXTY_MIN]: if source == DATASOURCE.MONGO: try: res = QAQueryAdv.QA_fetch_future_min_adv( code, start, end, frequence=frequence ) except: res = None if source == DATASOURCE.TDX or res is None: res = QATdx.QA_fetch_get_future_min( code, start, end, frequence=frequence ) res = QA_DataStruct_Future_min( res.set_index(['datetime', 'code']) ) elif market == MARKET_TYPE.INDEX_CN: if frequence == FREQUENCE.DAY: if source == DATASOURCE.MONGO: try: res = QAQueryAdv.QA_fetch_index_day_adv(code, start, end) except: return None if source == DATASOURCE.TDX or res == None: res = QATdx.QA_fetch_get_index_day(code, start, end) res = QA_DataStruct_Index_day(res.set_index(['date', 'code'])) elif frequence in [FREQUENCE.ONE_MIN, FREQUENCE.FIVE_MIN, FREQUENCE.FIFTEEN_MIN, FREQUENCE.THIRTY_MIN, FREQUENCE.SIXTY_MIN]: if source == DATASOURCE.MONGO: try: res = QAQueryAdv.QA_fetch_index_min_adv( code, start, end, frequence=frequence ) except: res = None if source == DATASOURCE.TDX or res == None: res = QATdx.QA_fetch_get_index_min( code, start, end, frequence=frequence ) res = QA_DataStruct_Index_min( res.set_index(['datetime', 'code']) ) elif market == MARKET_TYPE.OPTION_CN: if source == DATASOURCE.MONGO: #res = QAQueryAdv.QA_fetch_option_day_adv(code, start, end) raise NotImplementedError('CURRENT NOT FINISH THIS METHOD') # print(type(res)) if output is OUTPUT_FORMAT.DATAFRAME: return res.data elif output is OUTPUT_FORMAT.DATASTRUCT: return res elif output is OUTPUT_FORMAT.NDARRAY: return res.to_numpy() elif output is OUTPUT_FORMAT.JSON: return res.to_json() elif output is OUTPUT_FORMAT.LIST: return res.to_list()
def _checkQA_fetch_stock_xdxr(self, codes): """比较数据库中 stock_day, stock_adj中的记录数应该相同 若不相同,则删除stock_day, stock_adj对应的数据 备注:目前为找到不相同的原因。 和原始数据对不上:000001 date 345 1993-06-04 1568 1998-06-20 6673 documents deleted(记录被删除) stock_adj. 6673 documents deleted(记录被删除) stock_day. 和原始数据对不上:000002 date 1561 1998-06-20 6615 documents deleted(记录被删除) stock_adj. 6615 documents deleted(记录被删除) stock_day. 和原始数据对不上:000004 date 1570 1998-06-20 6493 documents deleted(记录被删除) stock_adj. 6493 documents deleted(记录被删除) stock_day. 000005 ok 5909 000006 ok 6583 000007 ok 6083 000008 ok 6390 和原始数据对不上:000009 date 148 1991-12-20 606 1993-10-11 6813 documents deleted(记录被删除) stock_adj. 6813 documents deleted(记录被删除) stock_day. 和原始数据对不上:000010 date 637 1998-06-20 4907 documents deleted(记录被删除) stock_adj. 4907 documents deleted(记录被删除) stock_day. 000011 ok 6565 """ data1 = QA.QA_fetch_stock_xdxr(codes) self.assertTrue(len(data1) >= 0, "未保存数据") if len(data1) > 0: print(set(data1['code'])) # 保存xdxr后,xdxr和股票日线数据数量应该一致 start = '1990-01-01' end_time = str(now_time())[0:10] for code in codes: try: data = qa.QA_fetch_stock_day_adv(code, start, end_time).data except Exception as e: print("{} 本地无数据".format(code)) try: dataAdj = qa.QA_fetch_stock_adj(code, start, end_time) df1 = pd.DataFrame(data.index.levels[0]) df2 = pd.DataFrame(dataAdj.index) # df1['a'] = df2['a'] = 1 df = pd.concat([df1, df2]).drop_duplicates(keep=False) if len(df) > 0: print("和原始数据对不上:{}".format(code), df) df = df[df['date'] > '2000-01-01'] if len(df) == 0: # 数据对不上的时间早于2000年,则pass continue # 和原始数据对不上,删除stock_day, stock_adj table = DATABASE.stock_adj self._delTableDocument(table, code) table = DATABASE.stock_day self._delTableDocument(table, code) else: print("{} ok {}".format(code, len(data))) self.assertTrue( data.iloc[-1].name[0] == dataAdj.iloc[-1].date, "最后日期不匹配,是否未保存xdxr?") except Exception as e: # stock_adj 无数据 print("跳过 {}".format(code))
def _saving_work(self, code): def __QA_log_info(code, end_time, start_time): def loginfo(prefix='', astr='', listCounts=5): if len(self._loginfolist) < listCounts: self._loginfolist.append(astr) else: str = '' for i in range(len(self._loginfolist)): str += self._loginfolist[i] + ' ' str += astr QA_util_log_info(prefix.format(str), self.ui_log) self._loginfolist.clear() prefix = '##JOB02 Saving parallelism {}_DAY ==== Trying updating\n{}'.format( "xdxr", '{}') loginfo(prefix, ' {} from {} to {}'.format(code, start_time, end_time)) try: search_cond = {'code': str(code)[0:6]} _col = get_coll(client=None, cacheName="stock_xdxr", tableName="stock_xdxr") ref_ = _col.find(search_cond) ref_count = _col.count_documents(search_cond) end_time = str(now_time())[0:10] _xdxr = QA_fetch_get_stock_xdxr(str(code)) if ref_count > 0: start_time = ref_[ref_count - 1]['date'] __QA_log_info(code, end_time, start_time) if start_time != end_time: # 最后一次保存的数据之后的数据 xdxrdata = _xdxr[_xdxr['date'] > start_time] if len(xdxrdata) > 0: _col.insert_many(QA_util_to_json_from_pandas(xdxrdata), ordered=False) else: # 第一次保存数据 try: start_time = '1990-01-01' __QA_log_info(code, end_time, start_time) _col.insert_many(QA_util_to_json_from_pandas(_xdxr), ordered=False) except Exception as e: pass except Exception as e: QA_util_log_info(e.args, ui_log=self.ui_log) self.err.append(str(code)) QA_util_log_info(self.err, ui_log=self.ui_log) try: # 插入前复权数据 coll_adj = get_coll(client=None, cacheName="stock_adj", tableName="stock_adj") data = QA_fetch_stock_day(str(code), '1990-01-01', str(datetime.date.today()), 'pd') qfq = _QA_data_stock_to_fq(data, _xdxr, 'qfq') qfq = qfq.assign(date=qfq.date.apply(lambda x: str(x)[0:10])) adjdata = QA_util_to_json_from_pandas( qfq.loc[:, ['date', 'code', 'adj']]) # 如果没有新增幅圈数据,插入新增数据 ref_ = coll_adj.find(search_cond) ref_count = coll_adj.count_documents(search_cond) if ref_count > 0: # 历史保存过数据 lastref = ref_[ref_count - 1] del lastref['_id'] try: # 存在相同的数据,则保存最近数据 adjdata.index(lastref) # 找不到对应的数据,会出发异常 start_time = lastref['date'] adjdata2 = QA_util_to_json_from_pandas(qfq.loc[slice( pd.Timestamp(start_time), pd.Timestamp( end_time))].loc[:, ['date', 'code', 'adj']][1:]) if (ref_count + len(adjdata2)) == len(adjdata): # if len(adjdata2) > 0: coll_adj.insert_many(adjdata2) else: raise Exception("数据总量不匹配,重新存储") except Exception as e: # 分红数据有变化,删除以前保存的数据,重新保存新的数据 coll_adj.delete_many({'code': code}) coll_adj.insert_many(adjdata) else: # 第一次保存前复权数据 # print(adjdata) coll_adj.insert_many(adjdata) except Exception as e: print(e)