예제 #1
0
    def __gen_param(stock_list, coll_stock_day, ip_list=[]):
        results = []
        count = len(ip_list)
        total = len(stock_list)
        for item in range(len(stock_list)):
            try:
                code = stock_list[item]
                QA_util_log_info(
                    '##JOB01 Now Saving STOCK_DAY=== {}'.format(str(code)),
                    ui_log
                )

                # 首选查找数据库 是否 有 这个代码的数据
                search_cond = {'code': str(code)[0:6]}
                ref = coll_stock_day.find(search_cond)
                end_date = str(now_time())[0:10]
                ref_count = coll_stock_day.count_documents(search_cond)

                # 当前数据库已经包含了这个代码的数据, 继续增量更新
                # 加入这个判断的原因是因为如果股票是刚上市的 数据库会没有数据 所以会有负索引问题出现
                if ref_count > 0:
                    # 接着上次获取的日期继续更新
                    start_date = ref[ref_count - 1]['date']
                    # print("ref[ref.count() - 1]['date'] {} {}".format(ref.count(), coll_stock_day.count_documents({'code': str(code)[0:6]})))
                else:
                    # 当前数据库中没有这个代码的股票数据, 从1990-01-01 开始下载所有的数据
                    start_date = '1990-01-01'
                QA_util_log_info(
                    'UPDATE_STOCK_DAY \n Trying updating {} from {} to {}'
                        .format(code,
                                start_date,
                                end_date),
                    ui_log
                )
                if start_date != end_date:
                    # 更新过的,不更新
                    results.extend([(code, start_date, end_date, '00', 'day',
                                     ip_list[item % count]['ip'],
                                     ip_list[item % count]['port'], item,
                                     total,
                                     ui_log, ui_progress)])
            except Exception as error0:
                print('Exception:{}'.format(error0))
                err.append(code)
        return results
예제 #2
0
    def __gen_param(stock_list, coll_stock_day, ip_list=[]):
        results = []
        count = len(ip_list)
        total = len(stock_list)
        for item in range(len(stock_list)):
            try:
                code = stock_list[item]
                QA_util_log_info(
                    '##JOB01 Now Saving STOCK_DAY==== {}'.format(str(code)),
                    ui_log
                )

                # 首选查找数据库 是否 有 这个代码的数据
                search_cond = {'code': str(code)[0:6]}
                ref = coll_stock_day.find(search_cond)
                end_date = str(now_time())[0:10]
                ref_count = coll_stock_day.count_documents(search_cond)

                # 当前数据库已经包含了这个代码的数据, 继续增量更新
                # 加入这个判断的原因是因为如果股票是刚上市的 数据库会没有数据 所以会有负索引问题出现
                if ref_count > 0:
                    # 接着上次获取的日期继续更新
                    start_date = ref[ref_count - 1]['date']
                    # print("ref[ref.count() - 1]['date'] {} {}".format(ref.count(), coll_stock_day.count_documents({'code': str(code)[0:6]})))
                else:
                    # 当前数据库中没有这个代码的股票数据, 从1990-01-01 开始下载所有的数据
                    start_date = '1990-01-01'
                QA_util_log_info(
                    'UPDATE_STOCK_DAY \n Trying updating {} from {} to {}'
                        .format(code,
                                start_date,
                                end_date),
                    ui_log
                )
                if start_date != end_date:
                    # 更新过的,不更新
                    results.extend([(code, start_date, end_date, '00', 'day', ip_list[item % count]['ip'],
                                     ip_list[item % count]['port'], item, total, ui_log, ui_progress)])
            except Exception as error0:
                print('Exception:{}'.format(error0))
                err.append(code)
        return results
예제 #3
0
    def __saving_work(self, code):
        def __QA_log_info(code, end_time, start_time):
            def loginfo(prefix='', astr='', listCounts=5):
                if len(self._loginfolist) < listCounts:
                    self._loginfolist.append(astr)
                else:
                    str = ''
                    for i in range(len(self._loginfolist)):
                        str += self._loginfolist[i] + ' '
                    str += astr
                    QA_util_log_info(prefix.format(str), self.ui_log)
                    self._loginfolist.clear()

            index_or_etf = self.get_index_or_etf_from_code(code)
            prefix = '##JOB04 Saving {}_DAY ==== Trying updating\n{}'.format(
                index_or_etf, '{}')
            loginfo(prefix, ' {} from {} to {}'.format(code, start_time,
                                                       end_time))
            # log_info = '##JOB04 Saving {}_DAY====\nTrying updating {} from {} to {}'.format(
            #     index_or_etf,
            #     code,
            #     start_time,
            #     end_time
            # )
            # QA_util_log_info(
            #     log_info,
            #     ui_log=self.ui_log
            # )

        try:
            search_cond = {'code': str(code)[0:6]}
            ref_ = get_coll().find(search_cond)
            ref_count = get_coll().count_documents(search_cond)

            end_time = str(now_time())[0:10]
            if ref_count > 0:
                start_time = ref_[ref_count - 1]['date']

                __QA_log_info(code, end_time, start_time)

                if start_time != end_time:
                    get_coll().insert_many(
                        QA_util_to_json_from_pandas(
                            QA_fetch_get_index_day(
                                str(code), QA_util_get_next_day(start_time),
                                end_time)))
            else:
                try:
                    start_time = '1990-01-01'
                    __QA_log_info(code, end_time, start_time)
                    get_coll().insert_many(
                        QA_util_to_json_from_pandas(
                            QA_fetch_get_index_day(str(code), start_time,
                                                   end_time)))
                except Exception as e:
                    start_time = '2009-01-01'
                    __QA_log_info(code, end_time, start_time)
                    get_coll().insert_many(
                        QA_util_to_json_from_pandas(
                            QA_fetch_get_index_day(str(code), start_time,
                                                   end_time)))
        except Exception as e:
            QA_util_log_info(e, ui_log=self.ui_log)
            self.err.append(str(code))
            QA_util_log_info(self.err, ui_log=self.ui_log)
예제 #4
0
파일: Fetcher.py 프로젝트: zwcdp/QUANTAXIS
def QA_quotation_adv(code, start, end=save_tdx.now_time(), frequence='1min',
                     market=MARKET_TYPE.STOCK_CN, source=DATASOURCE.AUTO, output=OUTPUT_FORMAT.DATAFRAME):
    """一个统一的获取k线的方法
    如果source=DATASOURCE.AUTO,优先mongo,从本地数据库获取,mongo中未下载的数据从TDX中在线补全。(仅限股票)

    Arguments:
        code {str/list} -- 期货/股票的代码
        start {str} -- 开始日期
        end {str} -- 结束日期
        frequence {enum} -- 频率 QA.FREQUENCE
        market {enum} -- 市场 QA.MARKET_TYPE
        source {enum} -- 来源 QA.DATASOURCE
        output {enum} -- 输出类型 QA.OUTPUT_FORMAT 
    """
    if pd.Timestamp(end) > pd.Timestamp(save_tdx.now_time()):
        end = save_tdx.now_time()
    res = None
    if market == MARKET_TYPE.STOCK_CN:
        if frequence == FREQUENCE.DAY or frequence == FREQUENCE.WEEK:
            if source == DATASOURCE.AUTO:
                try:
                    # 返回的是QA_DataStruct_Stock_day对象,为了与在线获取的数据格式保持统一,转成单索引
                    res = QAQueryAdv.QA_fetch_stock_day_adv(
                        code, start, end).data.reset_index(level='code')
                    # res = QAQueryAdv.QA_fetch_stock_day_adv(
                    #     code, start, end).data.reset_index(level='code')[:14]
                    start_date = res.index[-1]
                    end_date = pd.Timestamp(end)
                    if end_date-start_date > datetime.timedelta(hours=17):
                        # 从TDX补充数据,由于仅考虑个股,在这里不做入库操作,入库还是需要save
                        data_tdx = QATdx.QA_fetch_get_stock_day(
                            code, QA_util_get_next_period(start_date, frequence), end_date, '00')
                        # data_tdx与从数据库获取的数据格式上做一些统一。
                        data_tdx = data_tdx.rename(columns={"vol": "volume"}).drop([
                            'date', 'date_stamp'], axis=1)
                        data_tdx.index = pd.to_datetime(data_tdx.index)
                        res = pd.concat([res, data_tdx], sort=True)
                    res = QA_DataStruct_Stock_day(
                        res.reset_index().set_index(['date', 'code']))
                except:
                    res = None
            if source == DATASOURCE.MONGO:
                try:
                    res = QAQueryAdv.QA_fetch_stock_day_adv(code, start, end)
                except:
                    res = None
            if source == DATASOURCE.TDX or res == None:
                res = QATdx.QA_fetch_get_stock_day(code, start, end, '00')
                res = QA_DataStruct_Stock_day(res.set_index(['date', 'code']))
            elif source == DATASOURCE.TUSHARE:
                res = QATushare.QA_fetch_get_stock_day(code, start, end, '00')
            if frequence == FREQUENCE.WEEK:
                res = QA_DataStruct_Stock_day(
                    QA_data_day_resample(res.data))
        elif frequence in [FREQUENCE.ONE_MIN, FREQUENCE.FIVE_MIN, FREQUENCE.FIFTEEN_MIN, FREQUENCE.THIRTY_MIN, FREQUENCE.SIXTY_MIN]:
            if source == DATASOURCE.AUTO:
                try:
                    # 返回的是QA_DataStruct_Stock_day对象,为了与在线获取的数据格式保持统一,转成单索引
                    res = QAQueryAdv.QA_fetch_stock_min_adv(
                        code, start, end, frequence=frequence).data.reset_index(level='code')
                    # res = QAQueryAdv.QA_fetch_stock_min_adv(
                    #     code, start, end, frequence=frequence).data.reset_index(level='code')[:710]
                    start_date = res.index[-1]
                    end_date = pd.Timestamp(end)
                    if end_date > start_date:
                        # 从TDX补充数据,由于仅考虑个股,在这里不做入库操作,入库还是需要save
                        data_tdx = QATdx.QA_fetch_get_stock_min(code, QA_util_get_next_period(
                            start_date, frequence), end_date, frequence=frequence)
                        # data_tdx与从数据库获取的数据格式上做一些统一。
                        data_tdx = data_tdx.rename(columns={"vol": "volume"}).drop(
                            ['date', 'datetime', 'date_stamp', 'time_stamp'], axis=1)
                        data_tdx.index = pd.to_datetime(data_tdx.index)
                        res = pd.concat([res, data_tdx], sort=True)
                    res = QA_DataStruct_Stock_day(
                        res.reset_index().set_index(['datetime', 'code']))
                except:
                    res = None
            if source == DATASOURCE.MONGO:
                try:
                    res = QAQueryAdv.QA_fetch_stock_min_adv(
                        code,
                        start,
                        end,
                        frequence=frequence
                    )
                except:
                    res = None
            if source == DATASOURCE.TDX or res == None:
                res = QATdx.QA_fetch_get_stock_min(
                    code,
                    start,
                    end,
                    frequence=frequence
                )
                res = QA_DataStruct_Stock_min(
                    res.set_index(['datetime',
                                   'code'])
                )

    elif market == MARKET_TYPE.FUTURE_CN:
        if frequence == FREQUENCE.DAY:
            if source == DATASOURCE.MONGO:
                try:
                    res = QAQueryAdv.QA_fetch_future_day_adv(code, start, end)
                except:
                    res = None
            if source == DATASOURCE.TDX or res is None:
                res = QATdx.QA_fetch_get_future_day(code, start, end)
                res = QA_DataStruct_Future_day(res.set_index(['date', 'code']))
        elif frequence in [FREQUENCE.ONE_MIN,
                           FREQUENCE.FIVE_MIN,
                           FREQUENCE.FIFTEEN_MIN,
                           FREQUENCE.THIRTY_MIN,
                           FREQUENCE.SIXTY_MIN]:
            if source == DATASOURCE.MONGO:
                try:
                    res = QAQueryAdv.QA_fetch_future_min_adv(
                        code,
                        start,
                        end,
                        frequence=frequence
                    )
                except:
                    res = None
            if source == DATASOURCE.TDX or res is None:
                res = QATdx.QA_fetch_get_future_min(
                    code,
                    start,
                    end,
                    frequence=frequence
                )
                res = QA_DataStruct_Future_min(
                    res.set_index(['datetime',
                                   'code'])
                )

    elif market == MARKET_TYPE.INDEX_CN:
        if frequence == FREQUENCE.DAY:
            if source == DATASOURCE.MONGO:
                try:
                    res = QAQueryAdv.QA_fetch_index_day_adv(code, start, end)
                except:
                    return None
            if source == DATASOURCE.TDX or res == None:
                res = QATdx.QA_fetch_get_index_day(code, start, end)
                res = QA_DataStruct_Index_day(res.set_index(['date', 'code']))
        elif frequence in [FREQUENCE.ONE_MIN,
                           FREQUENCE.FIVE_MIN,
                           FREQUENCE.FIFTEEN_MIN,
                           FREQUENCE.THIRTY_MIN,
                           FREQUENCE.SIXTY_MIN]:
            if source == DATASOURCE.MONGO:
                try:
                    res = QAQueryAdv.QA_fetch_index_min_adv(
                        code,
                        start,
                        end,
                        frequence=frequence
                    )
                except:
                    res = None
            if source == DATASOURCE.TDX or res == None:
                res = QATdx.QA_fetch_get_index_min(
                    code,
                    start,
                    end,
                    frequence=frequence
                )
                res = QA_DataStruct_Index_min(
                    res.set_index(['datetime',
                                   'code'])
                )

    elif market == MARKET_TYPE.OPTION_CN:
        if source == DATASOURCE.MONGO:
            #res = QAQueryAdv.QA_fetch_option_day_adv(code, start, end)
            raise NotImplementedError('CURRENT NOT FINISH THIS METHOD')
    # print(type(res))

    if output is OUTPUT_FORMAT.DATAFRAME:
        return res.data
    elif output is OUTPUT_FORMAT.DATASTRUCT:
        return res
    elif output is OUTPUT_FORMAT.NDARRAY:
        return res.to_numpy()
    elif output is OUTPUT_FORMAT.JSON:
        return res.to_json()
    elif output is OUTPUT_FORMAT.LIST:
        return res.to_list()
 def _checkQA_fetch_stock_xdxr(self, codes):
     """比较数据库中 stock_day, stock_adj中的记录数应该相同
     若不相同,则删除stock_day, stock_adj对应的数据
     备注:目前为找到不相同的原因。
     和原始数据对不上:000001            date
     345  1993-06-04
     1568 1998-06-20
     6673  documents deleted(记录被删除) stock_adj.
     6673  documents deleted(记录被删除) stock_day.
     和原始数据对不上:000002            date
     1561 1998-06-20
     6615  documents deleted(记录被删除) stock_adj.
     6615  documents deleted(记录被删除) stock_day.
     和原始数据对不上:000004            date
     1570 1998-06-20
     6493  documents deleted(记录被删除) stock_adj.
     6493  documents deleted(记录被删除) stock_day.
     000005 ok 5909
     000006 ok 6583
     000007 ok 6083
     000008 ok 6390
     和原始数据对不上:000009           date
     148 1991-12-20
     606 1993-10-11
     6813  documents deleted(记录被删除) stock_adj.
     6813  documents deleted(记录被删除) stock_day.
     和原始数据对不上:000010           date
     637 1998-06-20
     4907  documents deleted(记录被删除) stock_adj.
     4907  documents deleted(记录被删除) stock_day.
     000011 ok 6565
     """
     data1 = QA.QA_fetch_stock_xdxr(codes)
     self.assertTrue(len(data1) >= 0, "未保存数据")
     if len(data1) > 0:
         print(set(data1['code']))
         # 保存xdxr后,xdxr和股票日线数据数量应该一致
         start = '1990-01-01'
         end_time = str(now_time())[0:10]
         for code in codes:
             try:
                 data = qa.QA_fetch_stock_day_adv(code, start,
                                                  end_time).data
             except Exception as e:
                 print("{} 本地无数据".format(code))
             try:
                 dataAdj = qa.QA_fetch_stock_adj(code, start, end_time)
                 df1 = pd.DataFrame(data.index.levels[0])
                 df2 = pd.DataFrame(dataAdj.index)
                 # df1['a'] = df2['a'] = 1
                 df = pd.concat([df1, df2]).drop_duplicates(keep=False)
                 if len(df) > 0:
                     print("和原始数据对不上:{}".format(code), df)
                     df = df[df['date'] > '2000-01-01']
                     if len(df) == 0:
                         # 数据对不上的时间早于2000年,则pass
                         continue
                     # 和原始数据对不上,删除stock_day, stock_adj
                     table = DATABASE.stock_adj
                     self._delTableDocument(table, code)
                     table = DATABASE.stock_day
                     self._delTableDocument(table, code)
                 else:
                     print("{} ok {}".format(code, len(data)))
                     self.assertTrue(
                         data.iloc[-1].name[0] == dataAdj.iloc[-1].date,
                         "最后日期不匹配,是否未保存xdxr?")
             except Exception as e:
                 # stock_adj 无数据
                 print("跳过 {}".format(code))
예제 #6
0
    def _saving_work(self, code):
        def __QA_log_info(code, end_time, start_time):
            def loginfo(prefix='', astr='', listCounts=5):
                if len(self._loginfolist) < listCounts:
                    self._loginfolist.append(astr)
                else:
                    str = ''
                    for i in range(len(self._loginfolist)):
                        str += self._loginfolist[i] + ' '
                    str += astr
                    QA_util_log_info(prefix.format(str), self.ui_log)
                    self._loginfolist.clear()

            prefix = '##JOB02 Saving parallelism {}_DAY ==== Trying updating\n{}'.format(
                "xdxr", '{}')
            loginfo(prefix, ' {} from {} to {}'.format(code, start_time,
                                                       end_time))

        try:
            search_cond = {'code': str(code)[0:6]}
            _col = get_coll(client=None,
                            cacheName="stock_xdxr",
                            tableName="stock_xdxr")
            ref_ = _col.find(search_cond)
            ref_count = _col.count_documents(search_cond)

            end_time = str(now_time())[0:10]
            _xdxr = QA_fetch_get_stock_xdxr(str(code))
            if ref_count > 0:
                start_time = ref_[ref_count - 1]['date']

                __QA_log_info(code, end_time, start_time)

                if start_time != end_time:
                    # 最后一次保存的数据之后的数据
                    xdxrdata = _xdxr[_xdxr['date'] > start_time]
                    if len(xdxrdata) > 0:
                        _col.insert_many(QA_util_to_json_from_pandas(xdxrdata),
                                         ordered=False)
            else:
                # 第一次保存数据
                try:
                    start_time = '1990-01-01'
                    __QA_log_info(code, end_time, start_time)
                    _col.insert_many(QA_util_to_json_from_pandas(_xdxr),
                                     ordered=False)
                except Exception as e:
                    pass
        except Exception as e:
            QA_util_log_info(e.args, ui_log=self.ui_log)
            self.err.append(str(code))
            QA_util_log_info(self.err, ui_log=self.ui_log)

        try:
            # 插入前复权数据
            coll_adj = get_coll(client=None,
                                cacheName="stock_adj",
                                tableName="stock_adj")
            data = QA_fetch_stock_day(str(code), '1990-01-01',
                                      str(datetime.date.today()), 'pd')
            qfq = _QA_data_stock_to_fq(data, _xdxr, 'qfq')
            qfq = qfq.assign(date=qfq.date.apply(lambda x: str(x)[0:10]))
            adjdata = QA_util_to_json_from_pandas(
                qfq.loc[:, ['date', 'code', 'adj']])
            # 如果没有新增幅圈数据,插入新增数据
            ref_ = coll_adj.find(search_cond)
            ref_count = coll_adj.count_documents(search_cond)
            if ref_count > 0:
                # 历史保存过数据
                lastref = ref_[ref_count - 1]
                del lastref['_id']
                try:
                    # 存在相同的数据,则保存最近数据
                    adjdata.index(lastref)  # 找不到对应的数据,会出发异常
                    start_time = lastref['date']
                    adjdata2 = QA_util_to_json_from_pandas(qfq.loc[slice(
                        pd.Timestamp(start_time), pd.Timestamp(
                            end_time))].loc[:, ['date', 'code', 'adj']][1:])
                    if (ref_count + len(adjdata2)) == len(adjdata):
                        #
                        if len(adjdata2) > 0:
                            coll_adj.insert_many(adjdata2)
                    else:
                        raise Exception("数据总量不匹配,重新存储")
                except Exception as e:
                    # 分红数据有变化,删除以前保存的数据,重新保存新的数据
                    coll_adj.delete_many({'code': code})
                    coll_adj.insert_many(adjdata)
            else:
                # 第一次保存前复权数据
                # print(adjdata)
                coll_adj.insert_many(adjdata)

        except Exception as e:
            print(e)