Ejemplo n.º 1
0
def QA_fetch_future_day(code,
                        start,
                        end,
                        format='numpy',
                        collections=DATABASE.future_day):
    start = str(start)[0:10]
    end = str(end)[0:10]
    code = QA_util_code_tolist(code, auto_fill=False)

    if (QA_util_dateordatetime_valid(start)) & (
            QA_util_dateordatetime_valid(end)):
        __data = []
        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "date_stamp": {
                    "$lte": QA_util_date_stamp(end),
                    "$gte": QA_util_date_stamp(start)
                }
            }, {"_id": 0},
            batch_size=10000)
        res = pd.DataFrame([item for item in cursor])
        '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)'''
        res = __QA_fetch_query_filter(
            res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.FUTURE_DAY, query=None)
        '''数据格式整理'''
        return QA_util_to_anyformat_from_pandas(data=res, format=format)
    else:
        QA_util_log_info(
            'QA Error QA_fetch_future_day data parameter start=%s end=%s is not right'
            % (start, end))
        return None
Ejemplo n.º 2
0
async def QA_fetch_stock_day(code,
                             start,
                             end,
                             format='numpy',
                             frequence='day',
                             collections=DATABASE_ASYNC.stock_day):

    '获取股票日线'
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end):

        __data = []
        cursor = collections.find({
            'code': {
                '$in': code
            },
            "date_stamp": {
                "$lte": QA_util_date_stamp(end),
                "$gte": QA_util_date_stamp(start)
            }
        })
        try:
            res = pd.DataFrame([item async for item in cursor])
        except SyntaxError:
            print('THIS PYTHON VERSION NOT SUPPORT "async for" function')
            pass
        try:
            res = res.drop(
                '_id', axis=1).assign(volume=res.vol).query('volume>1').assign(
                    date=pd.to_datetime(res.date)).drop_duplicates(
                        (['date', 'code'])).set_index('date', drop=False)
            res = res.ix[:, [
                'code', 'open', 'high', 'low', 'close', 'volume', 'amount',
                'date'
            ]]
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error QA_fetch_stock_day format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_stock_day data parameter start=%s end=%s is not right'
            % (start, end))
Ejemplo n.º 3
0
def QA_fetch_index_day(code, start, end, format='numpy', collections=DATABASE.index_day):
    '获取指数日线'
    start = str(start)[0:10]
    end = str(end)[0:10]
    code = QA_util_code_tolist(code)
    if QA_util_date_valid(end) == True:

        __data = []
        cursor = collections.find({
            'code': {'$in': code}, "date_stamp": {
                "$lte": QA_util_date_stamp(end),
                "$gte": QA_util_date_stamp(start)}})
        if format in ['dict', 'json']:
            return [data for data in cursor]
        for item in cursor:

            __data.append([str(item['code']), float(item['open']), float(item['high']), float(
                item['low']), float(item['close']), float(item['vol']), item['date']])

        # 多种数据格式
        if format in ['n', 'N', 'numpy']:
            __data = numpy.asarray(__data)
        elif format in ['list', 'l', 'L']:
            __data = __data
        elif format in ['P', 'p', 'pandas', 'pd']:

            __data = DataFrame(__data, columns=[
                'code', 'open', 'high', 'low', 'close', 'volume', 'date'])

            __data['date'] = pd.to_datetime(__data['date'])
            __data = __data.set_index('date', drop=False)
        return __data
    else:
        QA_util_log_info('something wrong with date')
    def QA_count_eastmoney_stock_xjlc_record_count_one_by_one(
            self, str_stock_code, collections=DATABASE.eastmoney_stock_zjlx):

        codeArray = QA_util_code_tolist(str_stock_code)
        cursor = collections.find({
            'stock_code': {
                '$in': codeArray
            }
        }).sort('date')

        sizeRec = cursor.count()

        firstRec = None
        lastRec = None
        if sizeRec > 0:
            firstRec = cursor[0]
            lastRec = cursor[sizeRec - 1]

        #返回 【code 记录条数 开始日期 结束日期 】
        #print(firstRec)
        #print(lastRec)

        firstRecDate = None
        if firstRec is not None:
            firstRecDate = firstRec['date']

        lastRecDate = None
        if lastRec is not None:
            lastRecDate = lastRec['date']

        return [str_stock_code, sizeRec, firstRecDate, lastRecDate]
Ejemplo n.º 5
0
def QA_fetch_stock_min(code,
                       start,
                       end,
                       format='numpy',
                       frequence='1min',
                       collections=DATABASE.stock_min):
    '获取股票分钟线'
    if frequence in ['1min', '1m']:
        frequence = '1min'
    elif frequence in ['5min', '5m']:
        frequence = '5min'
    elif frequence in ['15min', '15m']:
        frequence = '15min'
    elif frequence in ['30min', '30m']:
        frequence = '30min'
    elif frequence in ['60min', '60m']:
        frequence = '60min'
    else:
        print(
            "💢 Error QA_fetch_stock_min parameter frequence=%s is none of 1min 1m 5min 5m 15min 15m 30min 30m 60min 60m"
            % frequence)

    __data = []
    # code checking
    code = QA_util_code_tolist(code)

    cursor = collections.find({
        'code': {
            '$in': code
        },
        "time_stamp": {
            "$gte": QA_util_time_stamp(start),
            "$lte": QA_util_time_stamp(end)
        },
        'type': frequence
    })

    res = pd.DataFrame([item for item in cursor])
    try:
        res = res.drop(
            '_id', axis=1).assign(volume=res.vol).query('volume>1').assign(
                datetime=pd.to_datetime(res.datetime)).drop_duplicates(
                    ['datetime', 'code']).set_index('datetime', drop=False)
        # return res
    except:
        res = None
    if format in ['P', 'p', 'pandas', 'pd']:
        return res
    elif format in ['json', 'dict']:
        return QA_util_to_json_from_pandas(res)
    # 多种数据格式
    elif format in ['n', 'N', 'numpy']:
        return numpy.asarray(res)
    elif format in ['list', 'l', 'L']:
        return numpy.asarray(res).tolist()
    else:
        print(
            "💢 Error QA_fetch_stock_min format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
            % format)
        return None
Ejemplo n.º 6
0
def QA_fetch_index_day(code, start, end, format='numpy', collections=DATABASE.index_day):
    '获取指数日线'
    start = str(start)[0:10]
    end = str(end)[0:10]
    code = QA_util_code_tolist(code)
    if QA_util_date_valid(end) == True:

        cursor = collections.find({
            'code': {'$in': code}, "date_stamp": {
                "$lte": QA_util_date_stamp(end),
                "$gte": QA_util_date_stamp(start)}}, {"_id": 0}, batch_size=10000)

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.assign(volume=res.vol, date=pd.to_datetime(
                res.date)).drop_duplicates((['date', 'code'])).set_index('date', drop=False)
        except:
            res = None

        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print("QA Error QA_fetch_index_day format parameter %s is none of  \"P, p, pandas, pd , n, N, numpy !\" " % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_index_day data parameter start=%s end=%s is not right' % (start, end))
Ejemplo n.º 7
0
def QA_fetch_ctp_tick(code, start, end, frequence, format='pd', collections=DATABASE.ctp_tick):
    """仅供存储的ctp tick使用

    Arguments:
        code {[type]} -- [description]

    Keyword Arguments:
        format {str} -- [description] (default: {'pd'})
        collections {[type]} -- [description] (default: {DATABASE.ctp_tick})

    Returns:
        [type] -- [description]
    """

    code = QA_util_code_tolist(code, auto_fill=False)
    cursor = collections.find({
        'InstrumentID': {'$in': code}, "time_stamp": {
            "$gte": QA_util_time_stamp(start),
            "$lte": QA_util_time_stamp(end)
        }, 'type': frequence
    }, {"_id": 0}, batch_size=10000)

    hq = pd.DataFrame([data for data in cursor]).replace(1.7976931348623157e+308,
                                                         numpy.nan).replace('', numpy.nan).dropna(axis=1)
    p1 = hq.loc[:, ['ActionDay', 'AskPrice1', 'AskVolume1', 'AveragePrice', 'BidPrice1',
                    'BidVolume1', 'HighestPrice', 'InstrumentID', 'LastPrice',
                    'OpenInterest', 'TradingDay', 'UpdateMillisec',
                    'UpdateTime', 'Volume']]
    p1 = p1.assign(datetime=p1.ActionDay.apply(QA_util_date_int2str)+' '+p1.UpdateTime + (p1.UpdateMillisec/1000000).apply(lambda x: str('%.6f' % x)[1:]),
                   code=p1.InstrumentID)
    p1.datetime = pd.to_datetime(p1.datetime)
    return p1.set_index(p1.datetime)
Ejemplo n.º 8
0
def QA_fetch_future_day(code, start, end, format='numpy', collections=DATABASE.future_day):
    start = str(start)[0:10]
    end = str(end)[0:10]
    code = QA_util_code_tolist(code, auto_fill=False)

    if QA_util_date_valid(end) == True:

        __data = []
        cursor = collections.find({
            'code': {'$in': code}, "date_stamp": {
                "$lte": QA_util_date_stamp(end),
                "$gte": QA_util_date_stamp(start)}}, {"_id": 0}, batch_size=10000)
        if format in ['dict', 'json']:
            return [data for data in cursor]
        for item in cursor:

            __data.append([str(item['code']), float(item['open']), float(item['high']), float(
                item['low']), float(item['close']), float(item['position']), float(item['price']), float(item['trade']), item['date']])

        # 多种数据格式
        if format in ['n', 'N', 'numpy']:
            __data = numpy.asarray(__data)
        elif format in ['list', 'l', 'L']:
            __data = __data
        elif format in ['P', 'p', 'pandas', 'pd']:
            __data = DataFrame(
                __data, columns=['code', 'open', 'high', 'low', 'close', 'position', 'price', 'trade', 'date']).drop_duplicates()
            __data['date'] = pd.to_datetime(__data['date'])
            __data = __data.set_index('date', drop=False)
        else:
            print("QA Error QA_fetch_future_day format parameter %s is none of  \"P, p, pandas, pd , n, N, numpy !\" " % format)
        return __data
    else:
        QA_util_log_info('QA something wrong with date')
Ejemplo n.º 9
0
def QA_fetch_stock_xdxr(code, format='pd', collections=DATABASE.stock_xdxr):
    '获取股票除权信息/数据库'
    code = QA_util_code_tolist(code)
    data = pd.DataFrame([item for item in collections.find(
        {'code':  {'$in': code}}, batch_size=10000)]).drop(['_id'], axis=1)
    data['date'] = pd.to_datetime(data['date'])
    return data.set_index('date', drop=False)
Ejemplo n.º 10
0
def _QA_fetch_stock_adj(code,
                        start,
                        end,
                        format='pd',
                        collections=DATABASE.stock_adj):
    """获取股票复权系数 ADJ

    """

    start = str(start)[0:10]
    end = str(end)[0:10]
    #code= [code] if isinstance(code,str) else code

    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end):

        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "date": {
                    "$lte": end,
                    "$gte": start
                }
            }, {"_id": 0},
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        res.date = pd.to_datetime(res.date)
        return res.set_index('date', drop=False)
Ejemplo n.º 11
0
def QA_SU_save_financial_report_day(code=None,
                                    client=DATABASE,
                                    ui_log=None,
                                    ui_progress=None):
    '''
     save stock_day
    保存财报日历
    历史全部数据
    :return:
    '''
    def __saving_work(code, stock_financial):
        try:
            QA_util_log_info(
                '##JOB01 Now Saving WY financial_report==== {}'.format(
                    str(code)), ui_log)

            stock_financial.insert_many(QA_util_to_json_from_pandas(
                QA_fetch_get_stock_report_wy(code)),
                                        ordered=False)
            gc.collect()
        except Exception as error0:
            print(error0)
            err.append(str(code))

    if code is None:
        code = QA_fetch_financial_code()
    else:
        code = QA_util_code_tolist(code)
    if code is not None:
        stock_financial = client.stock_financial_wy
        stock_financial.create_index([("code", pymongo.ASCENDING),
                                      ("report_date", pymongo.ASCENDING)],
                                     unique=True)
        err = []
        for item in code:

            QA_util_log_info('The {} of Total {}'.format(
                (code.index(item) + 1), len(code)))

            strProgressToLog = 'DOWNLOAD PROGRESS {}'.format(
                str(float(
                    (code.index(item) + 1) / len(code) * 100))[0:4] + '%',
                ui_log)
            intProgressToLog = int(
                float((code.index(item) + 1) / len(code) * 100))
            QA_util_log_info(strProgressToLog,
                             ui_log=ui_log,
                             ui_progress=ui_progress,
                             ui_progress_int_value=intProgressToLog)

            __saving_work(item, stock_financial)

        if len(err) < 1:
            QA_util_log_info('SUCCESS save WY financial_report ^_^', ui_log)
        else:
            QA_util_log_info(' ERROR CODE \n ', ui_log)
            QA_util_log_info(err, ui_log)
    else:
        QA_util_log_info(' No report send \n ', ui_log)
Ejemplo n.º 12
0
def QA_fetch_stock_technical_index(code,
                                   start,
                                   end=None,
                                   type='day',
                                   format='pd'):
    '获取股票日线'
    #code= [code] if isinstance(code,str) else code
    # code checking
    if type == 'day':
        collections = DATABASE.stock_technical_index
    elif type == 'week':
        collections = DATABASE.stock_technical_week
    elif type == 'month':
        collections = DATABASE.stock_technical_month
    else:
        print("type should be in ['day', 'week', 'month']")
    code = QA_util_code_tolist(code)
    if QA_util_date_valid(end):

        __data = []
        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "date_stamp": {
                    "$lte": QA_util_date_stamp(end),
                    "$gte": QA_util_date_stamp(start)
                }
            }, {"_id": 0},
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.drop_duplicates((['code', 'date']))
            res['date'] = res['date'].apply(lambda x: str(x)[0:10])
            res = res.drop(['date_stamp'], axis=1).set_index(['date', 'code'])
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error QA_fetch_stock_technical_index format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_stock_technical_index data parameter start=%s end=%s is not right'
            % (start, end))
Ejemplo n.º 13
0
def QA_fetch_index_min(code,
                       start,
                       end,
                       format='numpy',
                       frequence='1min',
                       collections=DATABASE.index_min):
    '获取股票分钟线'
    if frequence in ['1min', '1m']:
        frequence = '1min'
    elif frequence in ['5min', '5m']:
        frequence = '5min'
    elif frequence in ['15min', '15m']:
        frequence = '15min'
    elif frequence in ['30min', '30m']:
        frequence = '30min'
    elif frequence in ['60min', '60m']:
        frequence = '60min'
    __data = []
    code = QA_util_code_tolist(code)
    cursor = collections.find({
        'code': {
            '$in': code
        },
        "time_stamp": {
            "$gte": QA_util_time_stamp(start),
            "$lte": QA_util_time_stamp(end)
        },
        'type': frequence
    })
    if format in ['dict', 'json']:
        return [data for data in cursor]
    for item in cursor:

        __data.append([
            str(item['code']),
            float(item['open']),
            float(item['high']),
            float(item['low']),
            float(item['close']),
            float(item['vol']), item['datetime'], item['time_stamp'],
            item['date']
        ])

    __data = DataFrame(__data,
                       columns=[
                           'code', 'open', 'high', 'low', 'close', 'volume',
                           'datetime', 'time_stamp', 'date'
                       ])

    __data['datetime'] = pd.to_datetime(__data['datetime'])
    __data = __data.set_index('datetime', drop=False)
    if format in ['numpy', 'np', 'n']:
        return numpy.asarray(__data)
    elif format in ['list', 'l', 'L']:
        return numpy.asarray(__data).tolist()
    elif format in ['P', 'p', 'pandas', 'pd']:
        return __data
Ejemplo n.º 14
0
def QA_fetch_stock_divyield(code,
                            start,
                            end=None,
                            format='pd',
                            collections=DATABASE.stock_divyield):
    '获取股票日线'
    #code= [code] if isinstance(code,str) else code
    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end):

        __data = []
        cursor = collections.find(
            {
                'a_stockcode': {
                    '$in': code
                },
                "dir_dcl_date": {
                    "$lte": end,
                    "$gte": start
                }
            },
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.drop('_id', axis=1).drop_duplicates(
                (['dir_dcl_date', 'a_stockcode']))
            res = res.ix[:, [
                'a_stockcode', 'a_stocksname', 'div_info', 'div_type_code',
                'bonus_shr', 'cash_bt', 'cap_shr', 'epsp', 'ps_cr', 'ps_up',
                'reg_date', 'dir_dcl_date', 'a_stockcode1', 'ex_divi_date',
                'prg'
            ]]
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error QA_fetch_stock_divyield format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_stock_divyield data parameter start=%s end=%s is not right'
            % (start, end))
Ejemplo n.º 15
0
def QA_fetch_stock_info(code, format='pd', collections=DATABASE.stock_info):
    code = QA_util_code_tolist(code)
    try:
        data = pd.DataFrame([item for item in collections.find(
            {'code':  {'$in': code}}, {"_id": 0}, batch_size=10000)])
        #data['date'] = pd.to_datetime(data['date'])
        return data.set_index('code', drop=False)
    except Exception as e:
        QA_util_log_info(e)
        return None
Ejemplo n.º 16
0
def QA_fetch_stock_block(code=None, format='pd', collections=DATABASE.stock_block):
    if code is not None:
        code = QA_util_code_tolist(code)
        data = pd.DataFrame([item for item in collections.find(
            {'code': {'$in': code}}, batch_size=10000)]).drop(['_id'], axis=1)
        return data.set_index('code', drop=False)
    else:
        data = pd.DataFrame(
            [item for item in collections.find()]).drop(['_id'], axis=1)
        return data.set_index('code', drop=False)
Ejemplo n.º 17
0
def QA_fetch_stock_financial_calendar(code,
                                      start,
                                      end=None,
                                      format='pd',
                                      collections=DATABASE.report_calendar):
    '获取股票日线'
    #code= [code] if isinstance(code,str) else code
    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end):

        __data = []
        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "real_date": {
                    "$lte": end,
                    "$gte": start
                }
            },
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.drop('_id', axis=1).drop_duplicates(
                (['report_date', 'code']))
            res = res.ix[:, [
                'code', 'name', 'pre_date', 'first_date', 'second_date',
                'third_date', 'real_date', 'codes', 'report_date', 'crawl_date'
            ]]
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error QA_fetch_stock_financial_calendar format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_stock_financial_calendar data parameter start=%s end=%s is not right'
            % (start, end))
Ejemplo n.º 18
0
def QA_fetch_stock_min(code,
                       start,
                       end,
                       frequence='1min',
                       format='numpy',
                       collections=DATABASE.stock_min):
    '获取股票分钟线'
    if (QA_util_dateordatetime_valid(start)) & (
            QA_util_dateordatetime_valid(end)):
        '''数据获取'''
        if frequence in ['1min', '1m']:
            frequence = '1min'
        elif frequence in ['5min', '5m']:
            frequence = '5min'
        elif frequence in ['15min', '15m']:
            frequence = '15min'
        elif frequence in ['30min', '30m']:
            frequence = '30min'
        elif frequence in ['60min', '60m']:
            frequence = '60min'
        else:
            QA_util_log_info(
                "QA Error QA_fetch_stock_min parameter frequence=%s is none of 1min 1m 5min 5m 15min 15m 30min 30m 60min 60m"
                % frequence)

        __data = []
        # code checking
        code = QA_util_code_tolist(code)

        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "time_stamp": {
                    "$gte": QA_util_time_stamp(start),
                    "$lte": QA_util_time_stamp(end)
                },
                'type': frequence
            }, {"_id": 0},
            batch_size=10000)

        res = pd.DataFrame([item for item in cursor])
        '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)'''
        res = __QA_fetch_query_filter(
            res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_MIN, query='volume>1')
        print(res)
        '''数据格式整理'''
        return QA_util_to_anyformat_from_pandas(data=res, format=format)
    else:
        QA_util_log_info(
            'QA Error QA_fetch_stock_min data parameter start=%s end=%s is not right'
            % (start, end))
        return None
Ejemplo n.º 19
0
def QA_fetch_stock_xdxr(code, format='pd', collections=DATABASE.stock_xdxr):
    '获取股票除权信息/数据库'
    code = QA_util_code_tolist(code)
    data = pd.DataFrame([
        item
        for item in collections.find({'code': {
            '$in': code
        }}, batch_size=10000)
    ]).drop(['_id'], axis=1)
    return __QA_fetch_query_filter(data,
                                   DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_XDXR,
                                   query=None)
Ejemplo n.º 20
0
def QA_fetch_stock_day(code,
                       start,
                       end,
                       format='numpy',
                       frequence='day',
                       collections=DATABASE.stock_day):
    '获取股票日线'
    start = str(start)[0:10]
    end = str(end)[0:10]
    #code= [code] if isinstance(code,str) else code

    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end) == True:

        __data = []
        cursor = collections.find({
            'code': {
                '$in': code
            },
            "date_stamp": {
                "$lte": QA_util_date_stamp(end),
                "$gte": QA_util_date_stamp(start)
            }
        })
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.drop(
                '_id', axis=1).assign(volume=res.vol).query('volume>1').assign(
                    date=pd.to_datetime(res.date)).drop_duplicates(
                        (['date', 'code'])).set_index('date', drop=False)
            res = res.ix[:, [
                'code', 'open', 'high', 'low', 'close', 'volume', 'amount',
                'date'
            ]]
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            return None
    else:
        QA_util_log_info('something wrong with date')
Ejemplo n.º 21
0
def QA_fetch_index_transaction(code,
                               start,
                               end,
                               format='numpy',
                               frequence='tick',
                               collections=DATABASE.index_transaction):
    '获取股票分钟线'
    if frequence in ['tick', 'TICK', 'transaction']:
        frequence = 'tick'
    else:
        print(
            "QA Error QA_fetch_index_transaction parameter frequence=%s is none of tick Tick transaction"
            % frequence)

    _data = []
    # code checking
    code = QA_util_code_tolist(code)

    cursor = collections.find(
        {
            'code': {
                '$in': code
            },
            "time_stamp": {
                "$gte": QA_util_time_stamp(start),
                "$lte": QA_util_time_stamp(end)
            },
            'type': frequence
        }, {"_id": 0},
        batch_size=10000)

    res = pd.DataFrame([item for item in cursor])
    try:
        res = res.assign(volume=res.vol, datetime=pd.to_datetime(
            res.datetime)).query('volume>1').drop_duplicates(
                ['datetime', 'code']).set_index('datetime', drop=False)
        # return res
    except:
        res = None
    if format in ['P', 'p', 'pandas', 'pd']:
        return res
    elif format in ['json', 'dict']:
        return QA_util_to_json_from_pandas(res)
    # 多种数据格式
    elif format in ['n', 'N', 'numpy']:
        return numpy.asarray(res)
    elif format in ['list', 'l', 'L']:
        return numpy.asarray(res).tolist()
    else:
        print(
            "QA Error QA_fetch_index_transaction format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
            % format)
        return None
Ejemplo n.º 22
0
def QA_fetch_stock_pure_tech_indicator(code, start, end, vol='non-zero', keys='all', format='pd', collections=DATABASE.stock_tech_indicator_3):
    """
    return: from start to end, doesn't exclude vol==0 dates in between, those date are continuous
            and aligned with sh index already
    """
    start = str(start)[0:10]
    end = str(end)[0:10]

    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end):
        if vol == 'non-zero':
            cursor = collections.find({
                'code': {'$in': code}, "date_stamp": {
                    "$lte": QA_util_date_stamp(end),
                    "$gte": QA_util_date_stamp(start)},
                    'volume': {"$gt": 0}})
        else:
            cursor = collections.find({
                'code': {'$in': code}, "date_stamp": {
                    "$lte": QA_util_date_stamp(end),
                    "$gte": QA_util_date_stamp(start)}})

        res = pd.DataFrame([item for item in cursor])
        try:
            # drop '_id', change 'date' from str to datetime, set 'date' to index
            res = res.drop('_id', axis=1).assign(date=pd.to_datetime(
                    res.date)).drop_duplicates((['date', 'code'])).set_index('date', drop=False)
            if keys != 'all':
                if isinstance(keys, str):
                    res = res.ix[:, [keys]]
                elif isinstance(keys, list):
                    res = res.ix[:, keys]
        except:
            res = None
        
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print("QA Error QA_fetch_tech_indicator format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_tech_indicator data parameter start=%s end=%s is not right' % (start, end))
Ejemplo n.º 23
0
def QA_fetch_stock_fianacial(code,
                             start,
                             end=None,
                             format='pd',
                             collections=DATABASE.stock_financial_analysis):
    '获取quant基础数据'
    #code= [code] if isinstance(code,str) else code
    # code checking
    code = QA_util_code_tolist(code)
    if QA_util_date_valid(end):
        cursor = collections.find(
            {
                'CODE': {
                    '$in': code
                },
                "date_stamp": {
                    "$lte": QA_util_date_stamp(end),
                    "$gte": QA_util_date_stamp(start)
                }
            },
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]
        res = pd.DataFrame([item for item in cursor])
        try:
            res.columns = [
                i.lower() if i == 'CODE' else i for i in list(res.columns)
            ]
            res = res.drop(['date_stamp', '_id'], axis=1).drop_duplicates(
                (['code', 'date']))
            res['RNG_RES'] = res['AVG60_RNG'] * 60 / res['RNG_60']
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            #res['report_date'] = pd.to_datetime(res['report_date']/1000, unit='s')
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error QA_fetch_financial_TTM format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_financial_TTM data parameter start=%s end=%s is not right'
            % (start, end))
Ejemplo n.º 24
0
def QA_fetch_financial_TTM(code,
                           start,
                           end=None,
                           format='pd',
                           collections=DATABASE.financial_TTM):
    '获取财报TTM数据'
    #code= [code] if isinstance(code,str) else code
    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end):
        __data = []

        cursor = collections.find(
            {
                'CODE': {
                    '$in': code
                },
                "date": {
                    "$lte": QA_util_date_stamp(end),
                    "$gte": QA_util_date_stamp(start)
                }
            },
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.drop('_id', axis=1).drop_duplicates(
                (['REPORT_DATE', 'CODE']))
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error QA_fetch_financial_TTM format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_financial_TTM data parameter start=%s end=%s is not right'
            % (start, end))
Ejemplo n.º 25
0
def QA_fetch_stock_day(code, start, end, format='numpy', frequence='day', collections=DATABASE.stock_day):
    """'获取股票日线'

    Returns:
        [type] -- [description]

        感谢@几何大佬的提示
        https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/#return-the-specified-fields-and-the-id-field-only

    """

    start = str(start)[0:10]
    end = str(end)[0:10]
    #code= [code] if isinstance(code,str) else code

    # code checking
    code = QA_util_code_tolist(code)

    if QA_util_date_valid(end):

        cursor = collections.find({
            'code': {'$in': code}, "date_stamp": {
                "$lte": QA_util_date_stamp(end),
                "$gte": QA_util_date_stamp(start)}}, {"_id": 0}, batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.assign(volume=res.vol, date=pd.to_datetime(
                res.date)).drop_duplicates((['date', 'code'])).query('volume>1').set_index('date', drop=False)
            res = res.ix[:, ['code', 'open', 'high', 'low',
                             'close', 'volume', 'amount', 'date']]
        except:
            res = None
        if format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print("QA Error QA_fetch_stock_day format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format)
            return None
    else:
        QA_util_log_info(
            'QA Error QA_fetch_stock_day data parameter start=%s end=%s is not right' % (start, end))
Ejemplo n.º 26
0
def QA_fetch_stock_info(code, format='pd', collections=DATABASE.stock_info):
    code = QA_util_code_tolist(code)
    try:
        data = pd.DataFrame([
            item for item in collections.find({'code': {
                '$in': code
            }}, {"_id": 0},
                                              batch_size=10000)
        ])
        #data['date'] = pd.to_datetime(data['date'])
        return __QA_fetch_query_filter(
            data, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_INFO, query=None)
    except Exception as e:
        QA_util_log_info(e)
        return None
Ejemplo n.º 27
0
def QA_fetch_stock_day(code,
                       start,
                       end,
                       format='numpy',
                       frequence='day',
                       collections=DATABASE.stock_day):
    """'获取股票日线'

    Returns:
        [type] -- [description]

        感谢@几何大佬的提示
        https://docs.mongodb.com/manual/tutorial/project-fields-from-query-results/#return-the-specified-fields-and-the-id-field-only
    """
    if (QA_util_dateordatetime_valid(start)) & (
            QA_util_dateordatetime_valid(end)):
        '''数据获取'''
        start_date = str(start)[0:10]
        end_date = str(end)[0:10]

        #code= [code] if isinstance(code,str) else code

        # code checking
        code = QA_util_code_tolist(code)

        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "date_stamp": {
                    "$lte": QA_util_date_stamp(end_date),
                    "$gte": QA_util_date_stamp(start_date)
                }
            }, {"_id": 0},
            batch_size=10000)
        #res=[QA_util_dict_remove_key(data, '_id') for data in cursor]
        res = pd.DataFrame([item for item in cursor])
        '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)'''
        res = __QA_fetch_query_filter(
            res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.STOCK_DAY, query='volume>1')
        '''数据格式整理'''
        return QA_util_to_anyformat_from_pandas(data=res, format=format)
    else:
        QA_util_log_info(
            'QA Error QA_fetch_stock_day data parameter start=%s end=%s is not right'
            % (start, end))
        return None
Ejemplo n.º 28
0
def QA_fetch_future_min(code,
                        start,
                        end,
                        frequence='1min',
                        format='numpy',
                        collections=DATABASE.future_min):
    '获取股票分钟线'
    if frequence in ['1min', '1m']:
        frequence = '1min'
    elif frequence in ['5min', '5m']:
        frequence = '5min'
    elif frequence in ['15min', '15m']:
        frequence = '15min'
    elif frequence in ['30min', '30m']:
        frequence = '30min'
    elif frequence in ['60min', '60m']:
        frequence = '60min'

    start = str(start)[0:19]
    end = str(end)[0:19]
    code = QA_util_code_tolist(code, auto_fill=False)

    if (QA_util_dateordatetime_valid(start)) & (
            QA_util_dateordatetime_valid(end)):
        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "time_stamp": {
                    "$gte": QA_util_time_stamp(start),
                    "$lte": QA_util_time_stamp(end)
                },
                'type': frequence
            },
            batch_size=10000)

        res = pd.DataFrame([item for item in cursor])
        '''数据处理(不改变格式,只进行异常排查,设置索引,选择重要的列这三个部分)'''
        res = __QA_fetch_query_filter(
            res, DATA_QUERY_INDEX_COLUMNS_UNIQUE.FUTURE_MIN, query=None)
        '''数据格式整理'''
        return QA_util_to_anyformat_from_pandas(data=res, format=format)
    else:
        QA_util_log_info(
            'QA Error QA_fetch_future_min data parameter start=%s end=%s is not right'
            % (start, end))
        return None
Ejemplo n.º 29
0
    def getMin(cls, code, start, end, if_fq='00', frequence=8):
        collections = cls.collectionsMin
        '获取股票分钟线'
        _, type_, _ = cls.getReverseFrequence(frequence)

        _data = []
        # code checking
        code = QA_util_code_tolist(code)

        cursor = collections.find(
            {
                'code': {
                    '$in': code
                },
                "time_stamp": {
                    "$gte": QA_util_time_stamp(start),
                    "$lte": QA_util_time_stamp(end)
                },
                'type': type_
            }, {"_id": 0},
            batch_size=10000)

        res = pd.DataFrame([item for item in cursor])
        try:
            res = res.assign(
                volume=res.vol, datetime=pd.to_datetime(
                    res.datetime)).query('volume>1').drop_duplicates(
                        ['datetime', 'code']).set_index('datetime', drop=False)
            # return res
        except:
            res = None
        if cls.format in ['P', 'p', 'pandas', 'pd']:
            return res
        elif cls.format in ['json', 'dict']:
            return QA_util_to_json_from_pandas(res)
        # 多种数据格式
        elif cls.format in ['n', 'N', 'numpy']:
            return numpy.asarray(res)
        elif cls.format in ['list', 'l', 'L']:
            return numpy.asarray(res).tolist()
        else:
            print(
                "QA Error QA_fetch_stock_min format parameter %s is none of  \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" "
                % cls.format)
            return None
Ejemplo n.º 30
0
def QA_fetch_future_min(
        code,
        start, end,
        format='numpy',
        frequence='1min',
        collections=DATABASE.future_min):
    '获取股票分钟线'
    if frequence in ['1min', '1m']:
        frequence = '1min'
    elif frequence in ['5min', '5m']:
        frequence = '5min'
    elif frequence in ['15min', '15m']:
        frequence = '15min'
    elif frequence in ['30min', '30m']:
        frequence = '30min'
    elif frequence in ['60min', '60m']:
        frequence = '60min'
    _data = []
    code = QA_util_code_tolist(code, auto_fill=False)
    cursor = collections.find({
        'code': {'$in': code}, "time_stamp": {
            "$gte": QA_util_time_stamp(start),
            "$lte": QA_util_time_stamp(end)
        }, 'type': frequence
    }, batch_size=10000)
    if format in ['dict', 'json']:
        return [data for data in cursor]
    for item in cursor:

        _data.append([str(item['code']), float(item['open']), float(item['high']), float(
            item['low']), float(item['close']), float(item.get('position', 0)), float(item.get('price', 0)), float(item.get('trade', item.get('volume', 0))),
            item['datetime'], item['tradetime'], item['time_stamp'], item['date'], item['type']])

    _data = DataFrame(_data, columns=[
        'code', 'open', 'high', 'low', 'close',  'position', 'price', 'trade', 'datetime', 'tradetime', 'time_stamp', 'date', 'type'])
    _data = _data.assign(datetime=pd.to_datetime(_data['datetime'])).drop_duplicates(
        (['datetime', 'code'])).set_index('datetime', drop=False)

    if format in ['numpy', 'np', 'n']:
        return numpy.asarray(_data)
    elif format in ['list', 'l', 'L']:
        return numpy.asarray(_data).tolist()
    elif format in ['P', 'p', 'pandas', 'pd']:
        return _data