예제 #1
0
def fill_au_factor_pre_close(begin_date, end_date):
    """
    为daily数据集填充:
    1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close
    2. pre_close = close(-1) * au_factor(-1)/au_factor
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    all_codes = get_all_codes()

    for code in all_codes:
        hfq_daily_cursor = DB_CONN['daily_hfq'].find(
            {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False},
            sort=[('date', ASCENDING)],
            projection={'date': True, 'close': True})

        date_hfq_close_dict = dict([(x['date'], x['close']) for x in hfq_daily_cursor])

        daily_cursor = DB_CONN['daily'].find(
            {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False},
            sort=[('date', ASCENDING)],
            projection={'date': True, 'close': True}
        )

        last_close = -1
        last_au_factor = -1

        update_requests = []
        for daily in daily_cursor:
            date = daily['date']
            try:
                close = daily['close']

                doc = dict()

                # 复权因子 = 当日后复权价格 / 当日实际价格
                au_factor = round(date_hfq_close_dict[date] / close, 2)
                doc['au_factor'] = au_factor
                # 当日前收价 = 前一日实际收盘价 * 前一日复权因子 / 当日复权因子
                if last_close != -1 and last_au_factor != -1:
                    pre_close = last_close * last_au_factor / au_factor
                    doc['pre_close'] = round(pre_close, 2)

                last_au_factor = au_factor
                last_close = close

                update_requests.append(
                    UpdateOne(
                        {'code': code, 'date': date, 'index': False},
                        {'$set': doc}))
            except:
                print('计算复权因子时发生错误,股票代码:%s,日期:%s' % (code, date), flush=True)
                # 恢复成初始值,防止用错
                last_close = -1
                last_au_factor = -1

        if len(update_requests) > 0:
            update_result = DB_CONN['daily'].bulk_write(update_requests, ordered=False)
            print('填充复权因子和前收,股票:%s,更新:%4d条' %
                  (code, update_result.modified_count), flush=True)
예제 #2
0
def compute_pe():
    """
    计算股票在某只的市盈率
    """

    # 获取所有股票
    codes = get_all_codes()

    for code in codes:
        print('计算市盈率, %s' % code)
        daily_cursor = daily_collection.find({
            'code': code,
            'index': False
        },
                                             projection={
                                                 'close': True,
                                                 'date': True
                                             })

        update_requests = []
        for daily in daily_cursor:
            _date = daily['date']
            # 找到该股票距离当前日期最近的年报,通过公告日期查询,防止未来函数
            finance_report = finance_report_collection.find_one(
                {
                    'code': code,
                    'report_date': {
                        '$regex': '\d{4}-12-31'
                    },
                    'announced_date': {
                        '$lte': _date
                    }
                },
                sort=[('announced_date', DESCENDING)])

            if finance_report is None:
                continue

            # 计算滚动市盈率并保存到daily_k中
            eps = 0
            if finance_report['eps'] != '-':
                eps = finance_report['eps']

            # 计算PE
            if eps != 0:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': _date,
                        'index': False
                    }, {'$set': {
                        'pe': round(daily['close'] / eps, 4)
                    }}))

        if len(update_requests) > 0:
            update_result = daily_collection.bulk_write(update_requests,
                                                        ordered=False)
            print('更新PE, %s, 更新:%d' % (code, update_result.modified_count))
def crawl_finance_report():
    # 先获取所有的股票列表
    codes = get_all_codes()

    # 创建连接池
    conn_pool = urllib3.PoolManager()

    # 抓取的财务地址,scode为股票代码
    url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \
          'type=YJBB20_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \
          '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}'

    # 循环抓取所有股票的财务信息
    for code in codes:
        # 替换股票代码,抓取该只股票的财务数据
        response = conn_pool.request('GET', url.replace('{0}', code))

        # 解析抓取结果
        result = json.loads(response.data.decode('UTF-8'))

        # 取出数据
        reports = result['data']

        # 更新数据库的请求列表
        update_requests = []
        # 循环处理所有报告数据
        for report in reports:
            doc = {
                # 报告期
                'report_date': report['reportdate'][0:10],
                # 公告日期
                'announced_date': report['latestnoticedate'][0:10],
                # 每股收益
                'eps': report['basiceps'],
                'code': code
            }

            # 将更新请求添加到列表中,更新时的查询条件为code、report_date,为了快速保存数据,需要增加索引
            # db.finance_report.createIndex({'code':1, 'report_date':1})
            update_requests.append(
                UpdateOne(
                    {
                        'code': code,
                        'report_date': doc['report_date']
                    },
                    # upsert=True保证了如果查不到数据,则插入一条新数据
                    {'$set': doc},
                    upsert=True))

        # 如果更新数据的请求列表不为空,则写入数据库
        if len(update_requests) > 0:
            # 采用批量写入的方式,加快保存速度
            update_result = DB_CONN['finance_report'].bulk_write(
                update_requests, ordered=False)
            print('股票 %s, 财报,更新 %d, 插入 %d' %
                  (code, update_result.modified_count,
                   update_result.upserted_count))
예제 #4
0
def compute_fractal(begin_date, end_date):
    codes = get_all_codes()

    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算MACD
            daily_cursor = DB_CONN['daily_hfq'].find(
                {'code': code, 'date': {'$gte': begin_date, '$lte': end_date}, 'index': False},
                sort=[('date', ASCENDING)],
                projection={'date': True, 'high': True, 'low': True, '_id': False}
            )

            df_daily = DataFrame([daily for daily in daily_cursor])

            df_daily.set_index(['date'], 1, inplace=True)

            df_daily_left_shift_1 = df_daily.shift(-1)
            df_daily_left_shift_2 = df_daily.shift(-2)
            df_daily_right_shift_1 = df_daily.shift(1)
            df_daily_right_shift_2 = df_daily.shift(2)

            df_daily['up'] = (df_daily['high'] > df_daily_left_shift_1['high']) & \
                             (df_daily['high'] > df_daily_left_shift_2['high']) & \
                             (df_daily['high'] > df_daily_right_shift_1['high']) & \
                             (df_daily['high'] > df_daily_right_shift_2['high'])

            df_daily['down'] = (df_daily['low'] < df_daily_left_shift_1['low']) & \
                               (df_daily['low'] < df_daily_left_shift_2['low']) & \
                               (df_daily['low'] < df_daily_right_shift_1['low']) & \
                               (df_daily['low'] < df_daily_right_shift_2['low'])

            df_daily = df_daily[(df_daily['up'] | df_daily['down'])]

            # 保存结果到数据库
            df_daily.drop(['high', 'low'], 1, inplace=True)

            print(df_daily)
            # 将信号保存到数据库
            update_requests = []
            for index in df_daily.index:
                doc = {
                    'code': code,
                    'date': index,
                    # 方向,向上突破 up,向下突破 down
                    'direction': 'up' if df_daily.loc[index]['up'] else 'down'
                }
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['fractal_signal'].bulk_write(update_requests, ordered=False)
                print('%s, upserted: %4d, modified: %4d' %
                      (code, update_result.upserted_count, update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
예제 #5
0
def crawl_finance_report():
    # 先获取所有的股票列表
    codes = get_all_codes()

    # 创建连接池
    conn_pool = urllib3.PoolManager()

    url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \
          'type=YJBB20_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \
          '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}'

    cookie = 'emstat_bc_emcount=21446959091031597218; pgv_pvi=8471522926; st_pvi=95785429701209; _' \
             'ga=GA1.2.700565749.1496634081; Hm_lvt_557fb74c38569c2da66471446bbaea3f=1499912514; _' \
             'qddaz=QD.g2d11t.ydltyz.j61eq2em; ct=YTJNd7eYzkV_0WPJBmEs-FB0AGfyz7Z9G-Z1' \
             'HbsPTxwV9TxpuvcB2fM1xoG5PhqgTI5KlrQZKFZReg3g3ltIwo8fMyzHhEzVjltYwjAigMTdZvdEHnU7QW2' \
             'O-7u0dCkmtsFOBI4vbW1ELaZ9iUS9qPFAtIkL9M8GJTj8liRUgJY; ut=FobyicMgeV4t8TZ4Md7eLYClhCqi0w' \
             'XPSu3ZyZ4h4Q8vWCyLMuChP80vhfidM2802fUv5AJEgl9ddudfTRqObGqQ47QN4oJS5hoWxdsHCY6lvJEeXDTNKWsdP' \
             'hsfzg0i-ukMlT11XfPMIsBG9DzhW3xDAR3flNcqE5csB2rT3cfVPchlihFWHk-f3F1-lSsBjduc9_Ws_jjJEsi46' \
             'xEai2mCVGd_O41yhPU3MWXl2_2QJU_ILgnzruwDvjeoQRtf8COKmiJCtE6hhy04RvSjmbzBVeZXqUhd; pi=42660' \
             '45025913572%3bb4266045025913572%3b%e8%82%a1%e5%8f%8bZTLUIt%3bo97rhoY6b5AbF5jETm3t72EC9RGp' \
             'IhrLsDj7myRgKyWSJmYrdl1WGaA9dMGpydaY4AptuI0ZgKDj6PCir1z%2bY1if6G0iITYI4Rv%2bPXy6H%2f4u7Rg' \
             'iD%2f2hCYAGnfitkw9HQXnqBETzflfUGnvGJysWiVyPlOp%2fZh4Hfe6NqssBxCqJUrGOCM06F7feAXC6Vapy%2fse' \
             '0PT2a%3bVMsSChhqtxvtvecfLmv9FInLBANRLHpns2d%2bJGh272rIXhkWm%2bNK%2bXxkRKL2a0EgScqdtlcYN1QC' \
             'hVUWT7gmrH9py08FBPk2n5EQA9m9Zt5o2m%2bMuQhON2f66vlq%2bGk3Z66s%2brgCQhSPqoUPxluzSwBk7I9NNA%3d' \
             '%3d; uidal=4266045025913572%e8%82%a1%e5%8f%8bZTLUIt; vtpst=|; em_hq_fls=old; emstat_ss_emco' \
             'unt=5_1505917025_902015979; st_si=83202211429810; em-quote-version=topspeed; showpr3guide=1; ' \
             'qgqp_b_id=367cbd71ad5c205f172815cdab571db9; hvlist=a-000858-2~a-000651-2~a-600000-1~a-300017-2' \
             '~a-600020-1~a-600005-1~a-600004-1~a-162605-2~a-159901-2~a-600015-1~a-002364-2~a-600128-1~a-0023' \
             '57-2~a-002363-2~a-601106-1; HAList=a-sz-300059-%u4E1C%u65B9%u8D22%u5BCC%2Ca-sz-002607-%u4E9A%u590' \
             'F%u6C7D%u8F66%2Ca-sh-603259-%u836F%u660E%u5EB7%u5FB7%2Ca-sz-000858-%u4E94%u7CAE%u6DB2%2Ca-sh-600165' \
             '-%u65B0%u65E5%u6052%u529B%2Ca-sh-603013-%u4E9A%u666E%u80A1%u4EFD%2Ca-sz-002841-%u89C6%u6E90%u80A1%u4' \
             'EFD%2Cf-0-399300-%u6CAA%u6DF1300%2Cf-0-000300-%u6CAA%u6DF1300%2Ca-sz-000651-%u683C%u529B%u7535%u5668%' \
             '2Ca-sz-000735-%u7F57%u725B%u5C71'
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) ' \
                 'Chrome/66.0.3359.139 Safari/537.36'

    for code in codes:
        response = conn_pool.request('GET', url.replace('{0}', code),
                                     headers={
                                         'Cookie': cookie,
                                         'User-Agent': user_agent})

        # 解析抓取结果
        result = json.loads(response.data.decode('UTF-8'))
        print(result)
예제 #6
0
def crawl_basic():
    """
    抓取所有股票的股票基础信息
    """

    # 获取指定日期范围的所有交易日列表
    all_codes = get_all_codes()

    # 按照每个交易日抓取

    lg = bs.login()
    for code in all_codes:
        try:
            # 抓取当日的基本信息
            crawl_basic_at_code(code)
        except:
            print('抓取股票基本信息时出错,代码:%s' % code, flush=True)
    bs.logout()
예제 #7
0
def threads_cal_pe(codes=None):
    '''
        多线程计算股票pe

        使用说明:
        使用时在main中加入如下代码:
        codes, threads = DailyCrawler().threads_get_stocks()
        for i in range(len(codes)):
            threads[i].start()
         for i in range(len(codes)):
            threads[i].join()
        '''
    # codes = get_all_codes()
    if codes is None:
        codes = get_all_codes()
    threads = []
    # dates = get_trading_dates()
    for code in codes:
        t = threading.Thread(target=pe_computing, args=(code, ))
        threads.append(t)
    return codes, threads
예제 #8
0
def macd_compute(begin_date=None, end_date=None, codes=None):

    if codes is None:
        codes = get_all_codes()
    if begin_date is None:
        begin_date = '2008-01-01'
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    if isinstance(codes, list) is False:
        codes = [codes]

    for code in codes:
        try:
            daily_hfq_cursor = daily_hfq_collection.find(
                {
                    'code': code,
                    'date': {
                        '$lte': end_date,
                        '$gte': begin_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                },
            ).hint([('code', 1), ('date', -1)])

            df_daily = DataFrame(daily for daily in daily_hfq_cursor)
            # for i in range(len(df_daily['date'])):
            #     df_daily['date'][i] = datetime.strptime(df_daily['date'][i], '%Y-%m-%d')
            df_daily.set_index(['date'], inplace=True)

            EMA1 = []
            EMA2 = []
            N1 = 12
            N2 = 26
            index = 0

            for date in df_daily.index:
                if index == 0:
                    EMA1.append(df_daily.loc[date]['close'])
                    EMA2.append(df_daily.loc[date]['close'])
                else:
                    EMA1.append(2 / (N1 + 1) *
                                (df_daily.loc[date]['close'] - EMA1[-1]) +
                                EMA1[-1])
                    EMA2.append(2 / (N2 + 1) *
                                (df_daily.loc[date]['close'] - EMA2[-1]) +
                                EMA2[-1])
                index += 1

            df_daily['EMA1'] = EMA1
            df_daily['EMA2'] = EMA2

            df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2']

            index = 0
            DEA = []
            M = 9
            for date in df_daily.index:
                if index == 0:
                    DEA.append(df_daily.loc[date]['DIFF'])
                else:
                    DEA.append(2 / (M + 1) *
                               (df_daily.loc[date]['DIFF'] - DEA[-1]) +
                               DEA[-1])
                index += 1
            df_daily['DEA'] = DEA

            df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA']
            df_daily['pre_delta'] = df_daily['delta'].shift(1)

            df_daily_gold = df_daily[(df_daily['delta'] > 0)
                                     & (df_daily['pre_delta'] <= 0)]
            df_daily_dead = df_daily[(df_daily['delta'] < 0)
                                     & (df_daily['pre_delta'] >= 0)]

            # print(df_daily_gold)
            # df_daily.plot(kind='line', title='macd', y=['DIFF', 'DEA'])
            # plt.show()

            updata_requests = []
            for date in df_daily_gold.index:
                updata_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'gold'
                        }
                    },
                              upsert=True))

            for date in df_daily_dead.index:
                updata_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'dead'
                        }
                    },
                              upsert=True))

            if len(updata_requests) > 0:
                DB_CONN['macd'].create_index([('code', 1), ('date', -1)],
                                             background=True)
                requests_result = DB_CONN['macd'].bulk_write(updata_requests,
                                                             ordered=False)
                print(
                    'Save MACD data, code: %s, date: %s, update: %4d, insert: %4d'
                    % (code, date, requests_result.upserted_count,
                       requests_result.modified_count),
                    flush=True)
        except:
            print('ERROR! code: %s' % code)
            traceback.print_exc()
예제 #9
0
def compute(begin_date, end_date):
    """
    计算指定日期内的Boll突破上轨和突破下轨信号,并保存到数据库中,
    方便查询使用
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """

    # 获取所有股票代码
    all_codes = get_all_codes()

    # 计算每一只股票的Boll信号
    for code in all_codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算BOLL
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                })

            df_daily = DataFrame([daily for daily in daily_cursor])

            # 计算MB,盘后计算,这里用当日的Close
            df_daily['MB'] = df_daily['close'].rolling(20).mean()
            # 计算STD20,计算20日的标准差
            df_daily['std'] = df_daily['close'].rolling(20).std()

            print(df_daily, flush=True)
            # 计算UP,上轨
            df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std']
            # 计算down,下轨
            df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std']

            print(df_daily, flush=True)

            # 将日期作为索引
            df_daily.set_index(['date'], inplace=True)

            # 将close移动一个位置,变为当前索引位置的前收
            last_close = df_daily['close'].shift(1)

            # 将上轨移一位,前一日的上轨和前一日的收盘价都在当日了
            shifted_up = df_daily['UP'].shift(1)
            # 突破上轨,是向上突破,条件是前一日收盘价小于前一日上轨,当日收盘价大于前一日上轨。
            df_daily['up_mask'] = (last_close <= shifted_up) & (
                df_daily['close'] > shifted_up)

            # 将下轨移一位,前一日的下轨和前一日的收盘价都在当日了
            shifted_down = df_daily['DOWN'].shift(1)
            # 突破下轨,是向下突破,条件是前一日收盘价大于前一日下轨,当日收盘价小于前一日下轨
            df_daily['down_mask'] = (last_close >= shifted_down) & (
                df_daily['close'] < shifted_down)

            # 对结果进行过滤,只保留向上突破或者向上突破的数据
            df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']]
            # 从DataFrame中扔掉不用的数据
            df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'],
                          1,
                          inplace=True)

            # 将信号保存到数据库
            update_requests = []
            # DataFrame的索引是日期
            for date in df_daily.index:
                # 保存的数据包括股票代码、日期和信号类型,结合数据集的名字,就表示某只股票在某日
                doc = {
                    'code': code,
                    'date': date,
                    # 方向,向上突破 up,向下突破 down
                    'direction':
                    'up' if df_daily.loc[date]['up_mask'] else 'down'
                }
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            # 如果有信号数据,则保存到数据库中
            if len(update_requests) > 0:
                # 批量写入到boll数据集中
                update_result = DB_CONN['boll'].bulk_write(update_requests,
                                                           ordered=False)
                print('%s, upserted: %4d, modified: %4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            traceback.print_exc()
예제 #10
0
def compute_rsi(begin_date, end_date):
    codes = get_all_codes()

    # 计算RSI
    N = 12
    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算MACD
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                })

            df_daily = DataFrame([daily for daily in daily_cursor])

            df_daily.set_index(['date'], 1, inplace=True)
            df_daily['pre_close'] = df_daily['close'].shift(1)
            df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close']
                                      ) * 100 / df_daily['pre_close']
            # 保留上涨的日期
            df_daily['up_pct'] = DataFrame({
                'up_pct': df_daily['change_pct'],
                'zero': 0
            }).max(1)

            # 计算RSI
            df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() / abs(
                df_daily['change_pct']).rolling(N).mean() * 100

            df_daily.plot(kind='line', title='RSI', y=['RSI'])
            plt.show()
            # 移位
            # df_daily['PREV_RSI'] = df_daily['RSI'].shift(1)
            #
            #
            #
            # # 超买,RSI下穿80
            # df_daily_gold = df_daily[(df_daily['RSI'] < 80) & (df_daily['PREV_RSI'] >= 80)]
            # # 超卖,RSI上穿20
            # df_daily_dead = df_daily[(df_daily['RSI'] > 20) & (df_daily['PREV_RSI'] <= 20)]
            #
            # # 保存结果到数据库
            # update_requests = []
            # for date in df_daily_gold.index:
            #     update_requests.append(UpdateOne(
            #         {'code': code, 'date': date},
            #         {'$set': {'code':code, 'date': date, 'signal': 'gold'}},
            #         upsert=True))
            #
            # for date in df_daily_dead.index:
            #     update_requests.append(UpdateOne(
            #         {'code': code, 'date': date},
            #         {'$set': {'code':code, 'date': date, 'signal': 'dead'}},
            #         upsert=True))
            #
            # if len(update_requests) > 0:
            #     update_result = DB_CONN['rsi'].bulk_write(update_requests, ordered=False)
            #     print('Save RSI, 股票代码:%s, 插入:%4d, 更新:%4d' %
            #           (code, update_result.upserted_count, update_result.modified_count), flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
예제 #11
0
def compute_macd(begin_date, end_date):
    codes = get_all_codes()
    # 短时
    short = 12
    # 长时
    long = 26

    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算MACD
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                })

            df_daily = DataFrame([daily for daily in daily_cursor])

            df_daily.set_index(['date'], 1, inplace=True)

            # 计算EMA
            index = 0
            EMA1 = []
            EMA2 = []
            for date in df_daily.index:
                if index == 0:
                    # 初始化短时EMA和长时EMA
                    EMA1.append(df_daily.loc[date]['close'])
                    EMA2.append(df_daily.loc[date]['close'])
                else:
                    EMA1.append(
                        2 / (short + 1) *
                        (df_daily.loc[date]['close'] - EMA1[index - 1]) +
                        EMA1[index - 1])
                    EMA2.append(
                        2 / (long + 1) *
                        (df_daily.loc[date]['close'] - EMA2[index - 1]) +
                        EMA2[index - 1])

                index += 1

            df_daily['EMA1'] = EMA1
            df_daily['EMA2'] = EMA2

            # 计算DIFF,短时EMA - 长时EMA
            df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2']

            # 计算DEA EMA(DIFF,M)
            m = 9
            index = 0
            DEA = []
            for date in df_daily.index:
                if index == 0:
                    DEA.append(df_daily.loc[date]['DIFF'])
                else:
                    # M = 9 DEA = EMA(DIFF, 9)
                    DEA.append(2 / (m + 1) *
                               (df_daily.loc[date]['DIFF'] - DEA[index - 1]) +
                               DEA[index - 1])
                index += 1

            df_daily['DEA'] = DEA

            df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA']
            df_daily['pre_delta'] = df_daily['delta'].shift(1)
            # 金叉,DIFF下穿DEA
            df_daily_gold = df_daily[(df_daily['pre_delta'] >= 0)
                                     & (df_daily['delta'] < 0)]
            # 死叉,DIFF上穿DEA
            df_daily_dead = df_daily[(df_daily['pre_delta'] <= 0)
                                     & (df_daily['delta'] > 0)]

            # 保存结果到数据库
            update_requests = []
            for date in df_daily_gold.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'gold'
                        }
                    },
                              upsert=True))

            for date in df_daily_dead.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'dead'
                        }
                    },
                              upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['macd'].bulk_write(update_requests,
                                                           ordered=False)
                print('Save MACD, 股票代码:%s, 插入:%4d, 更新:%4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
예제 #12
0
def compute(begin_date, end_date):
    """
    计算指定日期内的信号
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    all_codes = get_all_codes()

    all_codes = ['000651']

    for code in all_codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算MACD
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                })

            df_daily = DataFrame([daily for daily in daily_cursor])

            # 计算MB,盘后计算,这里用当日的Close
            df_daily['MB'] = df_daily['close'].rolling(20).mean()
            # 计算STD20
            df_daily['std'] = df_daily['close'].rolling(20).std()

            print(df_daily, flush=True)
            # 计算UP
            df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std']
            # 计算down
            df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std']

            print(df_daily, flush=True)

            # # 将日期作为索引
            # df_daily.set_index(['date'], inplace=True)
            #
            # # 将close移动一个位置,变为当前索引位置的前收
            # last_close = df_daily['close'].shift(1)
            #
            # # 突破上轨
            # shifted_up = df_daily['UP'].shift(1)
            # df_daily['up_mask'] = (last_close <= shifted_up) & (df_daily['close'] > shifted_up)
            #
            # # 突破下轨
            # shifted_down = df_daily['DOWN'].shift(1)
            # df_daily['down_mask'] = (last_close >= shifted_down) & (df_daily['close'] < shifted_down)
            #
            # # 过滤结果
            # df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']]
            # df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'], 1, inplace=True)
            #
            # # 将信号保存到数据库
            # update_requests = []
            # for index in df_daily.index:
            #     doc = {
            #         'code': code,
            #         'date': index,
            #         # 方向,向上突破 up,向下突破 down
            #         'direction': 'up' if df_daily.loc[index]['up_mask'] else 'down'
            #     }
            #     update_requests.append(
            #         UpdateOne(doc, {'$set': doc}, upsert=True))
            #
            # if len(update_requests) > 0:
            #     update_result = DB_CONN['boll'].bulk_write(update_requests, ordered=False)
            #     print('%s, upserted: %4d, modified: %4d' %
            #           (code, update_result.upserted_count, update_result.modified_count),
            #           flush=True)
        except:
            traceback.print_exc()
예제 #13
0
def compute_ris(begin_date=None, end_date=None, codes=None):
    '''
    计算RSI的值并把RSI数值保存到数据库中

    '''

    if codes is None:
        codes = get_all_codes()
    if begin_date is None:
        begin_date = '2008-01-01'
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    if isinstance(codes, list) is False:
        codes = [codes]

    N = 12
    for code in codes:

        try:
            # 获取后复权的价格,使用后复权的价格计算RSI
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                }).hint([('code', 1), ('date', -1)])

            df_daily = DataFrame([daily for daily in daily_cursor])

            if df_daily.index.size < N:
                print('data is not enough: %s' % code, flush=True)
                continue

            df_daily.set_index(['date'], 1, inplace=True)
            df_daily['pre_close'] = df_daily['close'].shift(1)
            df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close']
                                      ) * 100 / df_daily['pre_close']
            df_daily['up_pct'] = DataFrame({
                'up_pct': df_daily['change_pct'],
                'zero': 0
            }).max(1)
            df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() / abs(
                df_daily['change_pct']).rolling(N).mean() * 100
            df_daily['PREV_RSI'] = df_daily['RSI'].shift(1)
            df_daily.drop(['pre_close', 'change_pct', 'up_pct', 'close'],
                          axis=1,
                          inplace=True)

            # df_daily['up'] = 80
            # df_daily['down'] = 20
            # df_daily.plot(kind='line', title='RSI', y=['RSI', 'up', 'down'])
            # plt.show()

            # 将数据保存到mongodb中
            update_requests = []
            for date in df_daily.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'RSI': df_daily.loc[date]['RSI']
                        }
                    },
                              upsert=True))
            if len(update_requests) > 0:
                DB_CONN['RSI'].create_index([("code", 1), ("date", -1)],
                                            background=True)
                update_result = DB_CONN['RSI'].bulk_write(update_requests,
                                                          ordered=True)
                print('Save RSI data, code: %s, insert: %4d, update: %4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('ERROR happend %s' % code, flush=True)
예제 #14
0
def compute_macd(begin_date, end_date):
    """
    计算给定周期内的MACD金叉和死叉信号,把结果保存到数据库中
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    """
    下面几个参数是计算MACD时的产生,这几个参数的取值都是常用值
    也可以根据需要调整
    """
    # 短时
    short = 12
    # 长时
    long = 26
    # 计算DIFF的M值
    m = 9

    # 获取所有股票代码
    codes = get_all_codes()

    # 循环检测所有股票的MACD金叉和死叉信号
    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算MACD
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                })

            # 将数据存为DataFrame格式
            df_daily = DataFrame([daily for daily in daily_cursor])
            # 设置date作为索引
            df_daily.set_index(['date'], 1, inplace=True)

            # 计算EMA
            # alpha = 2/(N+1)
            # EMA(i) = (1 - alpha) * EMA(i-1) + alpha * CLOSE(i)
            #        = alpha * (CLOSE(i) - EMA(i-1)) + EMA(i-1)
            index = 0
            # 短时EMA列表
            EMA1 = []
            # 长时EMA列表
            EMA2 = []
            # 每天计算短时EMA和长时EMA
            for date in df_daily.index:
                # 第一天EMA就是当日的close,也就是收盘价
                if index == 0:
                    # 初始化短时EMA和长时EMA
                    EMA1.append(df_daily.loc[date]['close'])
                    EMA2.append(df_daily.loc[date]['close'])
                else:
                    # 短时EMA和长时EMA
                    EMA1.append(
                        2 / (short + 1) *
                        (df_daily.loc[date]['close'] - EMA1[index - 1]) +
                        EMA1[index - 1])
                    EMA2.append(
                        2 / (long + 1) *
                        (df_daily.loc[date]['close'] - EMA2[index - 1]) +
                        EMA2[index - 1])

                index += 1

            # 将短时EMA和长时EMA作为DataFrame的数据列
            df_daily['EMA1'] = EMA1
            df_daily['EMA2'] = EMA2

            # 计算DIFF,短时EMA - 长时EMA
            df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2']

            # 计算DEA,DIFF的EMA,计算公式是: EMA(DIFF,M)
            index = 0
            DEA = []
            for date in df_daily.index:
                if index == 0:
                    DEA.append(df_daily.loc[date]['DIFF'])
                else:
                    # M = 9 DEA = EMA(DIFF, 9)
                    DEA.append(2 / (m + 1) *
                               (df_daily.loc[date]['DIFF'] - DEA[index - 1]) +
                               DEA[index - 1])
                index += 1

            df_daily['DEA'] = DEA

            # 计算DIFF和DEA的差值
            df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA']
            # 将delta的移一位,那么前一天delta就变成了今天的pre_delta
            df_daily['pre_delta'] = df_daily['delta'].shift(1)
            # 金叉,DIFF上穿DEA,前一日DIFF在DEA下面,当日DIFF在DEA上面
            df_daily_gold = df_daily[(df_daily['pre_delta'] <= 0)
                                     & (df_daily['delta'] > 0)]
            # 死叉,DIFF下穿DEA,前一日DIFF在DEA上面,当日DIFF在DEA下面
            df_daily_dead = df_daily[(df_daily['pre_delta'] >= 0)
                                     & (df_daily['delta'] < 0)]

            # 保存结果到数据库
            update_requests = []
            for date in df_daily_gold.index:
                # 保存时以code和date为查询条件,做更新或者新建,所以对code和date建立索引
                # 通过signal字段表示金叉还是死叉,gold表示金叉
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'gold'
                        }
                    },
                              upsert=True))

            for date in df_daily_dead.index:
                # 保存时以code和date为查询条件,做更新或者新建,所以对code和date建立索引
                # 通过signal字段表示金叉还是死叉,dead表示死叉
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'dead'
                        }
                    },
                              upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['macd'].bulk_write(update_requests,
                                                           ordered=False)
                print('Save MACD, 股票代码:%s, 插入:%4d, 更新:%4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
예제 #15
0
def pe_computing(codes=None):
    '''
    利用eps和close计算股票的pe,并保存到mongodb中
    '''
    # 从finance_report中取出eps
    if codes is None:
        codes = get_all_codes()

    if isinstance(codes, list) is False:
        codes = [codes]

    for code in codes:
        # 从daily中找出close价格
        daily_cursor = daily_collection.find({'code': code},
                                             projection={
                                                 'close': True,
                                                 'date': True,
                                                 '_id': False
                                             })
        update_requests = []
        for daily in daily_cursor:
            date = daily['date']
            finance_eps_cursor = finance_report_collection.find_one(
                {
                    'code': code,
                    'report_date': {
                        '$regex': '\d{4}-12-31'
                    },
                    'announced_date': {
                        '$lt': date
                    }
                },
                projection={
                    'code': True,
                    'eps': True,
                    "_id": False
                },
                sort=[('announced_date', DESCENDING)])

            if finance_eps_cursor is None:
                continue

            if date < '2008-01-01':
                print('have no date in finance_reprot, code: %s' % code)
                finance_xiaoxiang().crawl_finance_report(code)
                break
            # 计算市盈率
            eps = 0
            if finance_eps_cursor['eps'] != '-':
                eps = finance_eps_cursor['eps']

            if eps != 0:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {'$set': {
                        'pe': round(daily['close'] / eps, 4)
                    }}))
        # 将市盈率更新到mongodb中
        if len(update_requests) > 0:
            update_result = daily_collection.bulk_write(update_requests,
                                                        ordered=False)
            print('update pe, code: %s, insert: %s, update: %s' %
                  (code, update_result.upserted_count,
                   update_result.modified_count))
예제 #16
0
파일: tusharePro.py 프로젝트: xhq197/quant

def get_next_trade_day(date):  #'%Y-%m-%d'
    tradingdays_list = get_trade_days()
    if date in tradingdays_list:
        today_index = tradingdays_list.index(date)
        if (today_index == len(tradingdays_list) - 1):
            return -1
        return tradingdays_list[int(today_index) + 1]


def get_tushare_code(begin_date='2000-01-01', end_date=None):
    if (end_date is None):
        end_date = datetime.now().strftime('%Y-%m-%d')
    # 初始化pro接口
    pro = ts.pro_api(
        'f3ef4ac4dc04104e0573aa75c29aef70f30837a416baf6cd1a0f8e81')
    tradingdays_list = get_trading_dates(begin_date=begin_date,
                                         end_date=end_date)
    # tradingdays_list = get_trade_days(begin_date= begin_date,end_date = end_date)
    codes = set()
    for day in tradingdays_list:
        data = pro.daily(trade_date=day.replace('-', ''))
        codes = codes | set(data.ts_code)
    return list(codes).sort()


if __name__ == '__main__':
    # print(get_tushare_code(begin_date = '2015-01-01',end_date = None))
    print(len(get_all_codes()))
예제 #17
0
def compute_boll(begin_date=None, end_date=None, codes=None):
    """
    计算指定日期内的信号
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    if codes is None:
        codes = get_all_codes()
    if begin_date is None:
        begin_date = '2008-01-01'
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    if isinstance(codes, list) is False:
        codes = [codes]

    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算Boll
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                }).hint([('code', 1), ('date', -1)])

            df_daily = DataFrame([daily for daily in daily_cursor])

            # 计算MB,盘后计算,这里用当日的Close
            df_daily['MB'] = df_daily['close'].rolling(20).mean()
            # 计算STD20
            df_daily['std'] = df_daily['close'].rolling(20).std()
            # 计算UP
            df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std']
            # 计算down
            df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std']

            # 将日期作为索引
            df_daily.set_index(['date'], inplace=True)

            # 将close移动一个位置,变为当前索引位置的前收
            last_close = df_daily['close'].shift(1)

            # 突破上轨
            shifted_up = df_daily['UP'].shift(1)
            df_daily['up_mask'] = (last_close <= shifted_up) & (
                df_daily['close'] > shifted_up)

            # 突破下轨
            shifted_down = df_daily['DOWN'].shift(1)
            df_daily['down_mask'] = (last_close >= shifted_down) & (
                df_daily['close'] < shifted_down)

            # 过滤结果
            df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']]
            df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'],
                          1,
                          inplace=True)

            # 将信号保存到数据库
            update_requests = []
            for index in df_daily.index:
                doc = {
                    'code': code,
                    'date': index,
                    # 方向,向上突破 up,向下突破 down
                    'direction':
                    'up' if df_daily.loc[index]['up_mask'] else 'down'
                }
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            if len(update_requests) > 0:
                DB_CONN['boll'].create_index([("code", 1), ("date", -1)],
                                             background=True)
                update_result = DB_CONN['boll'].bulk_write(update_requests,
                                                           ordered=False)
                print('%s, upserted: %4d, modified: %4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            traceback.print_exc()
예제 #18
0
def compute_fractal(begin_date, end_date):
    # 获取所有股票代码
    codes = get_all_codes()

    # 计算每个股票的信号
    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算分型信号
            daily_cursor = DB_CONN['daily_hfq'].find(
                {'code': code, 'date': {'$gte': begin_date, '$lte': end_date}, 'index': False},
                sort=[('date', ASCENDING)],
                projection={'date': True, 'high': True, 'low': True, '_id': False}
            )

            df_daily = DataFrame([daily for daily in daily_cursor])

            # 设置日期作为索引
            df_daily.set_index(['date'], 1, inplace=True)

            # 通过shift,将前两天和后两天对齐到中间一天
            df_daily_shift_1 = df_daily.shift(1)
            df_daily_shift_2 = df_daily.shift(2)
            df_daily_shift_3 = df_daily.shift(3)
            df_daily_shift_4 = df_daily.shift(4)

            # 顶分型,中间日的最高价既大于前两天的最高价,也大于后两天的最高价
            df_daily['up'] = (df_daily_shift_2['high'] > df_daily['high']) & \
							 (df_daily_shift_2['high'] > df_daily_shift_1['high']) & \
                             (df_daily_shift_2['high'] > df_daily_shift_3['high']) & \
                             (df_daily_shift_2['high'] > df_daily_shift_4['high']) 
                             

            # 底分型,中间日的最低价既小于前两天的最低价,也小于后两天的最低价
            df_daily['down'] = (df_daily_shift_2['low'] < df_daily['low']) & \
							   (df_daily_shift_2['low'] < df_daily_shift_1['low']) & \
                               (df_daily_shift_2['low'] < df_daily_shift_3['low']) & \
                               (df_daily_shift_2['low'] < df_daily_shift_4['low']) 
                               

            # 只保留了出现顶分型和低分型信号的日期
            df_daily = df_daily[(df_daily['up'] | df_daily['down'])]

            # 抛掉不用的数据
            df_daily.drop(['high', 'low'], 1, inplace=True)

            print(df_daily)
            # 将信号保存到数据库 ,
            update_requests = []
            # 保存的数据结果时,code、date和信号的方向
            for date in df_daily.index:
                doc = {
                    'code': code,
                    'date': date,
                    # up: 顶分型, down:底分型
                    'direction': 'up' if df_daily.loc[date]['up'] else 'down'
                }

                # 保存时以code、date和direction做条件,那么就需要在这三个字段上建立索引
                # db.fractal_signal.createIndex({'code': 1, 'date': 1, 'direction': 1})
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['fractal_signal'].bulk_write(update_requests, ordered=False)
                print('%s, upserted: %4d, modified: %4d' %
                      (code, update_result.upserted_count, update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
예제 #19
0
def fill_au_factor_pre_close(begin_date=None, end_date=None, codes=None):
    """
    为daily数据集填充:
    1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close
    2. pre_close = close(-1) * au_factor(-1)/au_factor
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    if codes is None:
        all_codes = get_all_codes()
    else:
        if isinstance(codes, list) is False:
            codes = [codes]
        all_codes = codes

    if begin_date is None:
        begin_date = '2008-01-01'
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')

    for code in all_codes:
        hfq_daily_cursor = DB_CONN['daily_hfq'].find(
            {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False},
            sort=[('date', ASCENDING)],
            projection={'date': True, 'close': True}).hint([('code', 1), ('date', -1)]).hint([('code', 1), ('date',-1)])

        date_hfq_close_dict = dict([(x['date'], x['close']) for x in hfq_daily_cursor])

        daily_cursor = DB_CONN['daily_none'].find(
            {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False},
            sort=[('date', ASCENDING)],
            projection={'date': True, 'close': True}
        ).hint([('code', 1), ('date', -1)])

        last_close = -1
        last_au_factor = -1

        update_requests = []
        for daily in daily_cursor:
            date = daily['date']
            try:
                close = daily['close']

                doc = dict()

                au_factor = round(date_hfq_close_dict[date] / close, 2)
                doc['au_factor'] = au_factor
                if last_close != -1 and last_au_factor != -1:
                    pre_close = last_close * last_au_factor / au_factor
                    doc['pre_close'] = round(pre_close, 2)

                last_au_factor = au_factor
                last_close = close

                update_requests.append(
                    UpdateOne(
                        {'code': code, 'date': date, 'index': False},
                        {'$set': doc}))
            except:
                print('ERROR happen when calculate au_factor, code: %s,date: %s' % (code, date), flush=True)
                # 恢复成初始值,防止用错
                last_close = -1
                last_au_factor = -1

        if len(update_requests) > 0:
            update_result = DB_CONN['daily_none'].bulk_write(update_requests, ordered=False)
            print('fill au_factor and pre_close, code: %s, update: %4d, insert: %s' %
                  (code, update_result.modified_count, update_result.upserted_count), flush=True)
예제 #20
0
def crawl_finance_report():
    # 先获取所有的股票列表
    codes = get_all_codes(2)
    # 创建连接池
    conn_pool = urllib3.PoolManager()

    # 抓取的财务地址,scode为股票代码
    # url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \
    #       'type=YJBB20_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \
    #       '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}'
    url = 'http://datacenter.eastmoney.com/api/data/get?callback=jQuery11230813372504046614_' \
          '1613984932139&st=REPORTDATE&sr=-1&ps={pagesize}&p={page}&sty=ALL&filter=(SECURITY_CODE%3D%22{0}%22)' \
          '&token=894050c76af8597a853f5b408b759f5d&type=RPT_LICO_FN_CPD'
    pagesize = '1000'  #此时数据只有一页
    page = '1'
    url = url.replace('{pagesize}', pagesize)
    url = url.replace('{page}', page)
    # 循环抓取所有股票的财务信息
    for code in codes:
        # 替换股票代码,抓取该只股票的财务数据
        response = conn_pool.request('GET', url.replace('{0}', code))

        # 解析抓取结果
        raw_buff = response.data.decode('UTF-8')
        buff = raw_buff[raw_buff.find('{'):raw_buff.rfind('}') + 1]
        result = json.loads(buff)

        # 取出数据
        if (result is None):
            print('reports is None = ', code)
            continue
        if ('result' in result and result['result'] is not None):

            if ('data' in result['result']
                    and result['result']['data'] is not None):
                reports = result['result']['data']
            else:
                print('result or data not in reports = ', code, '\n', result)
                continue
        else:
            print('result or data not in reports = ', code, '\n', result)
            continue

        # 更新数据库的请求列表
        update_requests = []
        # 循环处理所有报告数据
        for report in reports:
            if (report['REPORTDATE'] is None
                    or len(report['REPORTDATE']) < 10):
                print('REPORTDATE ERROR  ', report)
                continue
            if (report['UPDATE_DATE'] is None
                    or len(report['UPDATE_DATE']) < 10):
                print('UPDATE_DATE ERROR  ', report)
                continue
            doc = {
                'code': code,
                # REPORTDATE报告期
                'report_date': report['REPORTDATE'][:10],
                # 公告日期 # UPDATE_DATE最新公告时间
                'announced_date': report['UPDATE_DATE'][:10],
                # 每股收益(元)
                'eps': report['BASIC_EPS'],
                #每股收益(扣除)(元)
                'DEDUCT_BASIC_EPS': report['DEDUCT_BASIC_EPS'],
                #营业收入
                'TOTAL_OPERATE_INCOME': report['TOTAL_OPERATE_INCOME'],
                #净利润
                'PARENT_NETPROFIT': report['PARENT_NETPROFIT'],
                # YSTZ营业收入同比增长(%)
                'income_ratio': report['YSTZ'],
                # SJLTZ净利润同比增长(%)
                'netprofit_ratio': report['SJLTZ'],
                # BPS每股净资产(元)
                'BPS': report['BPS'],
                # MGJYXJJE每股经营现金流量(元)
                'operating_per_share': report['MGJYXJJE'],
                # YSHZ营业收入季度环比增长(%)
                'income_quart_ratio': report['YSHZ'],
                # SJLHZ 净利润季度环比增长(%)
                'netprofit_quart_ratio': report['SJLHZ'],
                # PUBLISHNAME 怀疑是行业分类
                'publish_name': report['PUBLISHNAME'],
                # NOTICE_DATE 首次公告日期
                'notice_date': report['NOTICE_DATE'],
                # QDATE 季度 2017Q3
                'quart_date': report['QDATE'],
                # SECURITY_CODE 股票代码1 600000SH
                'code1': report['SECUCODE'],
                # SECURITY_NAME_ABBR 名称
                'name': report['SECURITY_NAME_ABBR'],
                # TRADE_MARKET 交易所板块
                'trade_market': report['TRADE_MARKET'],
                # SECURITY_TYPE 股票类别 A股
                'security_type': report['SECURITY_TYPE'],
                #ASSIGNDSCRPT 利润分配 如"10派1.80元(含税)"
                'dividend': report['ASSIGNDSCRPT']
            }

            # 将更新请求添加到列表中,更新时的查询条件为code、report_date,为了快速保存数据,需要增加索引
            # db.finance_report.createIndex({'code':1, 'report_date':1})
            update_requests.append(
                UpdateOne(
                    {
                        'code': code,
                        'report_date': doc['report_date']
                    },
                    # upsert=True保证了如果查不到数据,则插入一条新数据
                    {'$set': doc},
                    upsert=True))

        # 如果更新数据的请求列表不为空,则写入数据库
        if len(update_requests) > 0:
            # 采用批量写入的方式,加快保存速度
            update_result = DB_CONN['finance_report'].bulk_write(
                update_requests, ordered=False)
            print('股票 %s, 财报,更新 %d, 插入 %d' %
                  (code, update_result.modified_count,
                   update_result.upserted_count))
예제 #21
0
    def crawl_finance_report(self):
        """
        从东方财富网站抓取三张财务报表
        :return:
        """
        # 先获取所有的股票列表
        codes = get_all_codes()

        # 创建连接池
        conn_pool = urllib3.PoolManager()

        # 抓取的网址,两个替换参数 {1} - 财报类型 {2} - 股票代码
        url = 'http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?' \
              'type={1}&token=70f12f2f4f091e459a279469fe49eca5&' \
              'st=reportdate&sr=-1&p=1&ps=500&filter=(scode=%27{2}%27)' \
              '&js={%22pages%22:(tp),%22data%22:%20(x)}&rt=51044775#'

        user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'

        # 对应的类型,分别资产负债表、现金流量表和利润表
        report_types = ['CWBB_ZCFZB', 'CWBB_XJLLB', 'CWBB_LRB']

        for code in codes:
            for report_type in report_types:
                print('开始抓取财报数据,股票:%s,财报类型:%s' % (code, report_type),
                      flush=True)
                response = conn_pool.request(
                    'GET',
                    url.replace('{1}', report_type).replace('{2}', code),
                    headers={'User-Agent': user_agent})

                # 解析抓取结果
                result = simplejson.loads(response.data.decode('UTF-8'),
                                          'UTF-8')

                reports = result['data']

                update_requests = []
                for report in reports:
                    # 更新字段端
                    try:
                        report.update({
                            # 公告日和报告期只保留年月日
                            'announced_date':
                            report['noticedate'][0:10],
                            'report_date':
                            report['reportdate'][0:10],
                            # 股票名称和股票代码的字段名和系统设计保持一致
                            'code':
                            code,
                            'name':
                            report['sname']
                        })

                        update_requests.append(
                            UpdateOne(
                                {
                                    'code': code,
                                    'report_date': report['report_date'],
                                    'announced_date': report['announced_date']
                                }, {'$set': report},
                                upsert=True))
                    except:
                        print('解析出错,股票:%s 财报类型:%s' % (code, report_type))

                if len(update_requests) > 0:
                    update_result = self.db[report_type].bulk_write(
                        update_requests, ordered=False)
                    print('股票 %s, 财报类型:%s,更新: %4d, 新增: %4d' %
                          (code, report_type, update_result.modified_count,
                           update_result.upserted_count))
예제 #22
0
def compute_rsi(begin_date, end_date):
    """
    计算指定时间段内的RSI信号,并保存到数据库中
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """

    # 获取所有股票代码
    codes = get_all_codes()

    # 计算RSI
    N = 12

    # 计算所有股票的RSI信号
    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算RSI
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                })

            df_daily = DataFrame([daily for daily in daily_cursor])

            # 如果查询出的行情数量还不足以计算N天的平均值,则不再参与计算
            if df_daily.index.size < N:
                print('数据量不够: %s' % code, flush=True)
                continue

            # 将日期作为索引
            df_daily.set_index(['date'], 1, inplace=True)
            # 将close移一位作为当日的pre_close
            df_daily['pre_close'] = df_daily['close'].shift(1)
            # 计算当日的涨跌幅:(close - pre_close) * 100 / pre_close
            df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close']
                                      ) * 100 / df_daily['pre_close']
            # 只保留上涨的日期的涨幅
            df_daily['up_pct'] = DataFrame({
                'up_pct': df_daily['change_pct'],
                'zero': 0
            }).max(1)

            # 计算RSI
            df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() / abs(
                df_daily['change_pct']).rolling(N).mean() * 100

            # 移位
            df_daily['PREV_RSI'] = df_daily['RSI'].shift(1)

            # 超买,RSI下穿80,作为卖出信号
            df_daily_over_bought = df_daily[(df_daily['RSI'] < 80)
                                            & (df_daily['PREV_RSI'] >= 80)]
            # 超卖,RSI上穿20,作为买入信号
            df_daily_over_sold = df_daily[(df_daily['RSI'] > 20)
                                          & (df_daily['PREV_RSI'] <= 20)]

            # 保存结果到数据库,要以code和date创建索引,db.rsi.createIndex({'code': 1, 'date': 1})
            update_requests = []
            # 超买数据,以code和date为key更新数据,signal为over_bought
            for date in df_daily_over_bought.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'over_bought'
                        }
                    },
                              upsert=True))

            # 超卖数据,以code和date为key更新数据,signal为over_sold
            for date in df_daily_over_sold.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'over_sold'
                        }
                    },
                              upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['rsi'].bulk_write(update_requests,
                                                          ordered=False)
                print('Save RSI, 股票代码:%s, 插入:%4d, 更新:%4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
예제 #23
0
def compute_fractal(begin_date=None, end_date=None, codes=None):
    """
    计算指定日期内的信号
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    if codes is None:
        codes = get_all_codes()
    if begin_date is None:
        begin_date = '2008-01-01'
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    if isinstance(codes, list) is False:
        codes = [codes]

    for code in codes:
        try:
            # 获取后复权的价格,使用后复权的价格计算分型
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'high': True,
                    'low': True,
                    '_id': False
                }).hint([('code', 1), ('date', -1)])

            df_daily = DataFrame([daily for daily in daily_cursor])

            df_daily.set_index(['date'], 1, inplace=True)

            df_daily_left_shift_1 = df_daily.shift(-1)
            df_daily_left_shift_2 = df_daily.shift(-2)
            df_daily_right_shift_1 = df_daily.shift(1)
            df_daily_right_shift_2 = df_daily.shift(2)

            df_daily['up'] = (df_daily['high'] > df_daily_left_shift_1['high']) & \
                             (df_daily['high'] > df_daily_left_shift_2['high']) & \
                             (df_daily['high'] > df_daily_right_shift_1['high']) & \
                             (df_daily['high'] > df_daily_right_shift_2['high'])

            df_daily['down'] = (df_daily['low'] < df_daily_left_shift_1['low']) & \
                               (df_daily['low'] < df_daily_left_shift_2['low']) & \
                               (df_daily['low'] < df_daily_right_shift_1['low']) & \
                               (df_daily['low'] < df_daily_right_shift_2['low'])

            df_daily = df_daily[(df_daily['up'] | df_daily['down'])]

            # 保存结果到数据库
            df_daily.drop(['high', 'low'], 1, inplace=True)

            print(df_daily)
            # 将信号保存到数据库
            update_requests = []
            for index in df_daily.index:
                doc = {
                    'code': code,
                    'date': index,
                    # 方向,向上突破 up,向下突破 down
                    'direction': 'up' if df_daily.loc[index]['up'] else 'down'
                }
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            if len(update_requests) > 0:
                DB_CONN['fractal_signal'].create_index([("code", 1),
                                                        ("date", -1)],
                                                       background=True)
                update_result = DB_CONN['fractal_signal'].bulk_write(
                    update_requests, ordered=False)
                print('%s, upserted: %4d, modified: %4d' %
                      (code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()