def fill_au_factor_pre_close(begin_date, end_date): """ 为daily数据集填充: 1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close 2. pre_close = close(-1) * au_factor(-1)/au_factor :param begin_date: 开始日期 :param end_date: 结束日期 """ all_codes = get_all_codes() for code in all_codes: hfq_daily_cursor = DB_CONN['daily_hfq'].find( {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False}, sort=[('date', ASCENDING)], projection={'date': True, 'close': True}) date_hfq_close_dict = dict([(x['date'], x['close']) for x in hfq_daily_cursor]) daily_cursor = DB_CONN['daily'].find( {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False}, sort=[('date', ASCENDING)], projection={'date': True, 'close': True} ) last_close = -1 last_au_factor = -1 update_requests = [] for daily in daily_cursor: date = daily['date'] try: close = daily['close'] doc = dict() # 复权因子 = 当日后复权价格 / 当日实际价格 au_factor = round(date_hfq_close_dict[date] / close, 2) doc['au_factor'] = au_factor # 当日前收价 = 前一日实际收盘价 * 前一日复权因子 / 当日复权因子 if last_close != -1 and last_au_factor != -1: pre_close = last_close * last_au_factor / au_factor doc['pre_close'] = round(pre_close, 2) last_au_factor = au_factor last_close = close update_requests.append( UpdateOne( {'code': code, 'date': date, 'index': False}, {'$set': doc})) except: print('计算复权因子时发生错误,股票代码:%s,日期:%s' % (code, date), flush=True) # 恢复成初始值,防止用错 last_close = -1 last_au_factor = -1 if len(update_requests) > 0: update_result = DB_CONN['daily'].bulk_write(update_requests, ordered=False) print('填充复权因子和前收,股票:%s,更新:%4d条' % (code, update_result.modified_count), flush=True)
def compute_pe(): """ 计算股票在某只的市盈率 """ # 获取所有股票 codes = get_all_codes() for code in codes: print('计算市盈率, %s' % code) daily_cursor = daily_collection.find({ 'code': code, 'index': False }, projection={ 'close': True, 'date': True }) update_requests = [] for daily in daily_cursor: _date = daily['date'] # 找到该股票距离当前日期最近的年报,通过公告日期查询,防止未来函数 finance_report = finance_report_collection.find_one( { 'code': code, 'report_date': { '$regex': '\d{4}-12-31' }, 'announced_date': { '$lte': _date } }, sort=[('announced_date', DESCENDING)]) if finance_report is None: continue # 计算滚动市盈率并保存到daily_k中 eps = 0 if finance_report['eps'] != '-': eps = finance_report['eps'] # 计算PE if eps != 0: update_requests.append( UpdateOne({ 'code': code, 'date': _date, 'index': False }, {'$set': { 'pe': round(daily['close'] / eps, 4) }})) if len(update_requests) > 0: update_result = daily_collection.bulk_write(update_requests, ordered=False) print('更新PE, %s, 更新:%d' % (code, update_result.modified_count))
def crawl_finance_report(): # 先获取所有的股票列表 codes = get_all_codes() # 创建连接池 conn_pool = urllib3.PoolManager() # 抓取的财务地址,scode为股票代码 url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \ 'type=YJBB20_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \ '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}' # 循环抓取所有股票的财务信息 for code in codes: # 替换股票代码,抓取该只股票的财务数据 response = conn_pool.request('GET', url.replace('{0}', code)) # 解析抓取结果 result = json.loads(response.data.decode('UTF-8')) # 取出数据 reports = result['data'] # 更新数据库的请求列表 update_requests = [] # 循环处理所有报告数据 for report in reports: doc = { # 报告期 'report_date': report['reportdate'][0:10], # 公告日期 'announced_date': report['latestnoticedate'][0:10], # 每股收益 'eps': report['basiceps'], 'code': code } # 将更新请求添加到列表中,更新时的查询条件为code、report_date,为了快速保存数据,需要增加索引 # db.finance_report.createIndex({'code':1, 'report_date':1}) update_requests.append( UpdateOne( { 'code': code, 'report_date': doc['report_date'] }, # upsert=True保证了如果查不到数据,则插入一条新数据 {'$set': doc}, upsert=True)) # 如果更新数据的请求列表不为空,则写入数据库 if len(update_requests) > 0: # 采用批量写入的方式,加快保存速度 update_result = DB_CONN['finance_report'].bulk_write( update_requests, ordered=False) print('股票 %s, 财报,更新 %d, 插入 %d' % (code, update_result.modified_count, update_result.upserted_count))
def compute_fractal(begin_date, end_date): codes = get_all_codes() for code in codes: try: # 获取后复权的价格,使用后复权的价格计算MACD daily_cursor = DB_CONN['daily_hfq'].find( {'code': code, 'date': {'$gte': begin_date, '$lte': end_date}, 'index': False}, sort=[('date', ASCENDING)], projection={'date': True, 'high': True, 'low': True, '_id': False} ) df_daily = DataFrame([daily for daily in daily_cursor]) df_daily.set_index(['date'], 1, inplace=True) df_daily_left_shift_1 = df_daily.shift(-1) df_daily_left_shift_2 = df_daily.shift(-2) df_daily_right_shift_1 = df_daily.shift(1) df_daily_right_shift_2 = df_daily.shift(2) df_daily['up'] = (df_daily['high'] > df_daily_left_shift_1['high']) & \ (df_daily['high'] > df_daily_left_shift_2['high']) & \ (df_daily['high'] > df_daily_right_shift_1['high']) & \ (df_daily['high'] > df_daily_right_shift_2['high']) df_daily['down'] = (df_daily['low'] < df_daily_left_shift_1['low']) & \ (df_daily['low'] < df_daily_left_shift_2['low']) & \ (df_daily['low'] < df_daily_right_shift_1['low']) & \ (df_daily['low'] < df_daily_right_shift_2['low']) df_daily = df_daily[(df_daily['up'] | df_daily['down'])] # 保存结果到数据库 df_daily.drop(['high', 'low'], 1, inplace=True) print(df_daily) # 将信号保存到数据库 update_requests = [] for index in df_daily.index: doc = { 'code': code, 'date': index, # 方向,向上突破 up,向下突破 down 'direction': 'up' if df_daily.loc[index]['up'] else 'down' } update_requests.append( UpdateOne(doc, {'$set': doc}, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['fractal_signal'].bulk_write(update_requests, ordered=False) print('%s, upserted: %4d, modified: %4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()
def crawl_finance_report(): # 先获取所有的股票列表 codes = get_all_codes() # 创建连接池 conn_pool = urllib3.PoolManager() url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \ 'type=YJBB20_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \ '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}' cookie = 'emstat_bc_emcount=21446959091031597218; pgv_pvi=8471522926; st_pvi=95785429701209; _' \ 'ga=GA1.2.700565749.1496634081; Hm_lvt_557fb74c38569c2da66471446bbaea3f=1499912514; _' \ 'qddaz=QD.g2d11t.ydltyz.j61eq2em; ct=YTJNd7eYzkV_0WPJBmEs-FB0AGfyz7Z9G-Z1' \ 'HbsPTxwV9TxpuvcB2fM1xoG5PhqgTI5KlrQZKFZReg3g3ltIwo8fMyzHhEzVjltYwjAigMTdZvdEHnU7QW2' \ 'O-7u0dCkmtsFOBI4vbW1ELaZ9iUS9qPFAtIkL9M8GJTj8liRUgJY; ut=FobyicMgeV4t8TZ4Md7eLYClhCqi0w' \ 'XPSu3ZyZ4h4Q8vWCyLMuChP80vhfidM2802fUv5AJEgl9ddudfTRqObGqQ47QN4oJS5hoWxdsHCY6lvJEeXDTNKWsdP' \ 'hsfzg0i-ukMlT11XfPMIsBG9DzhW3xDAR3flNcqE5csB2rT3cfVPchlihFWHk-f3F1-lSsBjduc9_Ws_jjJEsi46' \ 'xEai2mCVGd_O41yhPU3MWXl2_2QJU_ILgnzruwDvjeoQRtf8COKmiJCtE6hhy04RvSjmbzBVeZXqUhd; pi=42660' \ '45025913572%3bb4266045025913572%3b%e8%82%a1%e5%8f%8bZTLUIt%3bo97rhoY6b5AbF5jETm3t72EC9RGp' \ 'IhrLsDj7myRgKyWSJmYrdl1WGaA9dMGpydaY4AptuI0ZgKDj6PCir1z%2bY1if6G0iITYI4Rv%2bPXy6H%2f4u7Rg' \ 'iD%2f2hCYAGnfitkw9HQXnqBETzflfUGnvGJysWiVyPlOp%2fZh4Hfe6NqssBxCqJUrGOCM06F7feAXC6Vapy%2fse' \ '0PT2a%3bVMsSChhqtxvtvecfLmv9FInLBANRLHpns2d%2bJGh272rIXhkWm%2bNK%2bXxkRKL2a0EgScqdtlcYN1QC' \ 'hVUWT7gmrH9py08FBPk2n5EQA9m9Zt5o2m%2bMuQhON2f66vlq%2bGk3Z66s%2brgCQhSPqoUPxluzSwBk7I9NNA%3d' \ '%3d; uidal=4266045025913572%e8%82%a1%e5%8f%8bZTLUIt; vtpst=|; em_hq_fls=old; emstat_ss_emco' \ 'unt=5_1505917025_902015979; st_si=83202211429810; em-quote-version=topspeed; showpr3guide=1; ' \ 'qgqp_b_id=367cbd71ad5c205f172815cdab571db9; hvlist=a-000858-2~a-000651-2~a-600000-1~a-300017-2' \ '~a-600020-1~a-600005-1~a-600004-1~a-162605-2~a-159901-2~a-600015-1~a-002364-2~a-600128-1~a-0023' \ '57-2~a-002363-2~a-601106-1; HAList=a-sz-300059-%u4E1C%u65B9%u8D22%u5BCC%2Ca-sz-002607-%u4E9A%u590' \ 'F%u6C7D%u8F66%2Ca-sh-603259-%u836F%u660E%u5EB7%u5FB7%2Ca-sz-000858-%u4E94%u7CAE%u6DB2%2Ca-sh-600165' \ '-%u65B0%u65E5%u6052%u529B%2Ca-sh-603013-%u4E9A%u666E%u80A1%u4EFD%2Ca-sz-002841-%u89C6%u6E90%u80A1%u4' \ 'EFD%2Cf-0-399300-%u6CAA%u6DF1300%2Cf-0-000300-%u6CAA%u6DF1300%2Ca-sz-000651-%u683C%u529B%u7535%u5668%' \ '2Ca-sz-000735-%u7F57%u725B%u5C71' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) ' \ 'Chrome/66.0.3359.139 Safari/537.36' for code in codes: response = conn_pool.request('GET', url.replace('{0}', code), headers={ 'Cookie': cookie, 'User-Agent': user_agent}) # 解析抓取结果 result = json.loads(response.data.decode('UTF-8')) print(result)
def crawl_basic(): """ 抓取所有股票的股票基础信息 """ # 获取指定日期范围的所有交易日列表 all_codes = get_all_codes() # 按照每个交易日抓取 lg = bs.login() for code in all_codes: try: # 抓取当日的基本信息 crawl_basic_at_code(code) except: print('抓取股票基本信息时出错,代码:%s' % code, flush=True) bs.logout()
def threads_cal_pe(codes=None): ''' 多线程计算股票pe 使用说明: 使用时在main中加入如下代码: codes, threads = DailyCrawler().threads_get_stocks() for i in range(len(codes)): threads[i].start() for i in range(len(codes)): threads[i].join() ''' # codes = get_all_codes() if codes is None: codes = get_all_codes() threads = [] # dates = get_trading_dates() for code in codes: t = threading.Thread(target=pe_computing, args=(code, )) threads.append(t) return codes, threads
def macd_compute(begin_date=None, end_date=None, codes=None): if codes is None: codes = get_all_codes() if begin_date is None: begin_date = '2008-01-01' if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') if isinstance(codes, list) is False: codes = [codes] for code in codes: try: daily_hfq_cursor = daily_hfq_collection.find( { 'code': code, 'date': { '$lte': end_date, '$gte': begin_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }, ).hint([('code', 1), ('date', -1)]) df_daily = DataFrame(daily for daily in daily_hfq_cursor) # for i in range(len(df_daily['date'])): # df_daily['date'][i] = datetime.strptime(df_daily['date'][i], '%Y-%m-%d') df_daily.set_index(['date'], inplace=True) EMA1 = [] EMA2 = [] N1 = 12 N2 = 26 index = 0 for date in df_daily.index: if index == 0: EMA1.append(df_daily.loc[date]['close']) EMA2.append(df_daily.loc[date]['close']) else: EMA1.append(2 / (N1 + 1) * (df_daily.loc[date]['close'] - EMA1[-1]) + EMA1[-1]) EMA2.append(2 / (N2 + 1) * (df_daily.loc[date]['close'] - EMA2[-1]) + EMA2[-1]) index += 1 df_daily['EMA1'] = EMA1 df_daily['EMA2'] = EMA2 df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2'] index = 0 DEA = [] M = 9 for date in df_daily.index: if index == 0: DEA.append(df_daily.loc[date]['DIFF']) else: DEA.append(2 / (M + 1) * (df_daily.loc[date]['DIFF'] - DEA[-1]) + DEA[-1]) index += 1 df_daily['DEA'] = DEA df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA'] df_daily['pre_delta'] = df_daily['delta'].shift(1) df_daily_gold = df_daily[(df_daily['delta'] > 0) & (df_daily['pre_delta'] <= 0)] df_daily_dead = df_daily[(df_daily['delta'] < 0) & (df_daily['pre_delta'] >= 0)] # print(df_daily_gold) # df_daily.plot(kind='line', title='macd', y=['DIFF', 'DEA']) # plt.show() updata_requests = [] for date in df_daily_gold.index: updata_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'gold' } }, upsert=True)) for date in df_daily_dead.index: updata_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'dead' } }, upsert=True)) if len(updata_requests) > 0: DB_CONN['macd'].create_index([('code', 1), ('date', -1)], background=True) requests_result = DB_CONN['macd'].bulk_write(updata_requests, ordered=False) print( 'Save MACD data, code: %s, date: %s, update: %4d, insert: %4d' % (code, date, requests_result.upserted_count, requests_result.modified_count), flush=True) except: print('ERROR! code: %s' % code) traceback.print_exc()
def compute(begin_date, end_date): """ 计算指定日期内的Boll突破上轨和突破下轨信号,并保存到数据库中, 方便查询使用 :param begin_date: 开始日期 :param end_date: 结束日期 """ # 获取所有股票代码 all_codes = get_all_codes() # 计算每一只股票的Boll信号 for code in all_codes: try: # 获取后复权的价格,使用后复权的价格计算BOLL daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }) df_daily = DataFrame([daily for daily in daily_cursor]) # 计算MB,盘后计算,这里用当日的Close df_daily['MB'] = df_daily['close'].rolling(20).mean() # 计算STD20,计算20日的标准差 df_daily['std'] = df_daily['close'].rolling(20).std() print(df_daily, flush=True) # 计算UP,上轨 df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std'] # 计算down,下轨 df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std'] print(df_daily, flush=True) # 将日期作为索引 df_daily.set_index(['date'], inplace=True) # 将close移动一个位置,变为当前索引位置的前收 last_close = df_daily['close'].shift(1) # 将上轨移一位,前一日的上轨和前一日的收盘价都在当日了 shifted_up = df_daily['UP'].shift(1) # 突破上轨,是向上突破,条件是前一日收盘价小于前一日上轨,当日收盘价大于前一日上轨。 df_daily['up_mask'] = (last_close <= shifted_up) & ( df_daily['close'] > shifted_up) # 将下轨移一位,前一日的下轨和前一日的收盘价都在当日了 shifted_down = df_daily['DOWN'].shift(1) # 突破下轨,是向下突破,条件是前一日收盘价大于前一日下轨,当日收盘价小于前一日下轨 df_daily['down_mask'] = (last_close >= shifted_down) & ( df_daily['close'] < shifted_down) # 对结果进行过滤,只保留向上突破或者向上突破的数据 df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']] # 从DataFrame中扔掉不用的数据 df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'], 1, inplace=True) # 将信号保存到数据库 update_requests = [] # DataFrame的索引是日期 for date in df_daily.index: # 保存的数据包括股票代码、日期和信号类型,结合数据集的名字,就表示某只股票在某日 doc = { 'code': code, 'date': date, # 方向,向上突破 up,向下突破 down 'direction': 'up' if df_daily.loc[date]['up_mask'] else 'down' } update_requests.append( UpdateOne(doc, {'$set': doc}, upsert=True)) # 如果有信号数据,则保存到数据库中 if len(update_requests) > 0: # 批量写入到boll数据集中 update_result = DB_CONN['boll'].bulk_write(update_requests, ordered=False) print('%s, upserted: %4d, modified: %4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: traceback.print_exc()
def compute_rsi(begin_date, end_date): codes = get_all_codes() # 计算RSI N = 12 for code in codes: try: # 获取后复权的价格,使用后复权的价格计算MACD daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }) df_daily = DataFrame([daily for daily in daily_cursor]) df_daily.set_index(['date'], 1, inplace=True) df_daily['pre_close'] = df_daily['close'].shift(1) df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close'] ) * 100 / df_daily['pre_close'] # 保留上涨的日期 df_daily['up_pct'] = DataFrame({ 'up_pct': df_daily['change_pct'], 'zero': 0 }).max(1) # 计算RSI df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() / abs( df_daily['change_pct']).rolling(N).mean() * 100 df_daily.plot(kind='line', title='RSI', y=['RSI']) plt.show() # 移位 # df_daily['PREV_RSI'] = df_daily['RSI'].shift(1) # # # # # 超买,RSI下穿80 # df_daily_gold = df_daily[(df_daily['RSI'] < 80) & (df_daily['PREV_RSI'] >= 80)] # # 超卖,RSI上穿20 # df_daily_dead = df_daily[(df_daily['RSI'] > 20) & (df_daily['PREV_RSI'] <= 20)] # # # 保存结果到数据库 # update_requests = [] # for date in df_daily_gold.index: # update_requests.append(UpdateOne( # {'code': code, 'date': date}, # {'$set': {'code':code, 'date': date, 'signal': 'gold'}}, # upsert=True)) # # for date in df_daily_dead.index: # update_requests.append(UpdateOne( # {'code': code, 'date': date}, # {'$set': {'code':code, 'date': date, 'signal': 'dead'}}, # upsert=True)) # # if len(update_requests) > 0: # update_result = DB_CONN['rsi'].bulk_write(update_requests, ordered=False) # print('Save RSI, 股票代码:%s, 插入:%4d, 更新:%4d' % # (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()
def compute_macd(begin_date, end_date): codes = get_all_codes() # 短时 short = 12 # 长时 long = 26 for code in codes: try: # 获取后复权的价格,使用后复权的价格计算MACD daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }) df_daily = DataFrame([daily for daily in daily_cursor]) df_daily.set_index(['date'], 1, inplace=True) # 计算EMA index = 0 EMA1 = [] EMA2 = [] for date in df_daily.index: if index == 0: # 初始化短时EMA和长时EMA EMA1.append(df_daily.loc[date]['close']) EMA2.append(df_daily.loc[date]['close']) else: EMA1.append( 2 / (short + 1) * (df_daily.loc[date]['close'] - EMA1[index - 1]) + EMA1[index - 1]) EMA2.append( 2 / (long + 1) * (df_daily.loc[date]['close'] - EMA2[index - 1]) + EMA2[index - 1]) index += 1 df_daily['EMA1'] = EMA1 df_daily['EMA2'] = EMA2 # 计算DIFF,短时EMA - 长时EMA df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2'] # 计算DEA EMA(DIFF,M) m = 9 index = 0 DEA = [] for date in df_daily.index: if index == 0: DEA.append(df_daily.loc[date]['DIFF']) else: # M = 9 DEA = EMA(DIFF, 9) DEA.append(2 / (m + 1) * (df_daily.loc[date]['DIFF'] - DEA[index - 1]) + DEA[index - 1]) index += 1 df_daily['DEA'] = DEA df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA'] df_daily['pre_delta'] = df_daily['delta'].shift(1) # 金叉,DIFF下穿DEA df_daily_gold = df_daily[(df_daily['pre_delta'] >= 0) & (df_daily['delta'] < 0)] # 死叉,DIFF上穿DEA df_daily_dead = df_daily[(df_daily['pre_delta'] <= 0) & (df_daily['delta'] > 0)] # 保存结果到数据库 update_requests = [] for date in df_daily_gold.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'gold' } }, upsert=True)) for date in df_daily_dead.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'dead' } }, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['macd'].bulk_write(update_requests, ordered=False) print('Save MACD, 股票代码:%s, 插入:%4d, 更新:%4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()
def compute(begin_date, end_date): """ 计算指定日期内的信号 :param begin_date: 开始日期 :param end_date: 结束日期 """ all_codes = get_all_codes() all_codes = ['000651'] for code in all_codes: try: # 获取后复权的价格,使用后复权的价格计算MACD daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }) df_daily = DataFrame([daily for daily in daily_cursor]) # 计算MB,盘后计算,这里用当日的Close df_daily['MB'] = df_daily['close'].rolling(20).mean() # 计算STD20 df_daily['std'] = df_daily['close'].rolling(20).std() print(df_daily, flush=True) # 计算UP df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std'] # 计算down df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std'] print(df_daily, flush=True) # # 将日期作为索引 # df_daily.set_index(['date'], inplace=True) # # # 将close移动一个位置,变为当前索引位置的前收 # last_close = df_daily['close'].shift(1) # # # 突破上轨 # shifted_up = df_daily['UP'].shift(1) # df_daily['up_mask'] = (last_close <= shifted_up) & (df_daily['close'] > shifted_up) # # # 突破下轨 # shifted_down = df_daily['DOWN'].shift(1) # df_daily['down_mask'] = (last_close >= shifted_down) & (df_daily['close'] < shifted_down) # # # 过滤结果 # df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']] # df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'], 1, inplace=True) # # # 将信号保存到数据库 # update_requests = [] # for index in df_daily.index: # doc = { # 'code': code, # 'date': index, # # 方向,向上突破 up,向下突破 down # 'direction': 'up' if df_daily.loc[index]['up_mask'] else 'down' # } # update_requests.append( # UpdateOne(doc, {'$set': doc}, upsert=True)) # # if len(update_requests) > 0: # update_result = DB_CONN['boll'].bulk_write(update_requests, ordered=False) # print('%s, upserted: %4d, modified: %4d' % # (code, update_result.upserted_count, update_result.modified_count), # flush=True) except: traceback.print_exc()
def compute_ris(begin_date=None, end_date=None, codes=None): ''' 计算RSI的值并把RSI数值保存到数据库中 ''' if codes is None: codes = get_all_codes() if begin_date is None: begin_date = '2008-01-01' if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') if isinstance(codes, list) is False: codes = [codes] N = 12 for code in codes: try: # 获取后复权的价格,使用后复权的价格计算RSI daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }).hint([('code', 1), ('date', -1)]) df_daily = DataFrame([daily for daily in daily_cursor]) if df_daily.index.size < N: print('data is not enough: %s' % code, flush=True) continue df_daily.set_index(['date'], 1, inplace=True) df_daily['pre_close'] = df_daily['close'].shift(1) df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close'] ) * 100 / df_daily['pre_close'] df_daily['up_pct'] = DataFrame({ 'up_pct': df_daily['change_pct'], 'zero': 0 }).max(1) df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() / abs( df_daily['change_pct']).rolling(N).mean() * 100 df_daily['PREV_RSI'] = df_daily['RSI'].shift(1) df_daily.drop(['pre_close', 'change_pct', 'up_pct', 'close'], axis=1, inplace=True) # df_daily['up'] = 80 # df_daily['down'] = 20 # df_daily.plot(kind='line', title='RSI', y=['RSI', 'up', 'down']) # plt.show() # 将数据保存到mongodb中 update_requests = [] for date in df_daily.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'RSI': df_daily.loc[date]['RSI'] } }, upsert=True)) if len(update_requests) > 0: DB_CONN['RSI'].create_index([("code", 1), ("date", -1)], background=True) update_result = DB_CONN['RSI'].bulk_write(update_requests, ordered=True) print('Save RSI data, code: %s, insert: %4d, update: %4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('ERROR happend %s' % code, flush=True)
def compute_macd(begin_date, end_date): """ 计算给定周期内的MACD金叉和死叉信号,把结果保存到数据库中 :param begin_date: 开始日期 :param end_date: 结束日期 """ """ 下面几个参数是计算MACD时的产生,这几个参数的取值都是常用值 也可以根据需要调整 """ # 短时 short = 12 # 长时 long = 26 # 计算DIFF的M值 m = 9 # 获取所有股票代码 codes = get_all_codes() # 循环检测所有股票的MACD金叉和死叉信号 for code in codes: try: # 获取后复权的价格,使用后复权的价格计算MACD daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }) # 将数据存为DataFrame格式 df_daily = DataFrame([daily for daily in daily_cursor]) # 设置date作为索引 df_daily.set_index(['date'], 1, inplace=True) # 计算EMA # alpha = 2/(N+1) # EMA(i) = (1 - alpha) * EMA(i-1) + alpha * CLOSE(i) # = alpha * (CLOSE(i) - EMA(i-1)) + EMA(i-1) index = 0 # 短时EMA列表 EMA1 = [] # 长时EMA列表 EMA2 = [] # 每天计算短时EMA和长时EMA for date in df_daily.index: # 第一天EMA就是当日的close,也就是收盘价 if index == 0: # 初始化短时EMA和长时EMA EMA1.append(df_daily.loc[date]['close']) EMA2.append(df_daily.loc[date]['close']) else: # 短时EMA和长时EMA EMA1.append( 2 / (short + 1) * (df_daily.loc[date]['close'] - EMA1[index - 1]) + EMA1[index - 1]) EMA2.append( 2 / (long + 1) * (df_daily.loc[date]['close'] - EMA2[index - 1]) + EMA2[index - 1]) index += 1 # 将短时EMA和长时EMA作为DataFrame的数据列 df_daily['EMA1'] = EMA1 df_daily['EMA2'] = EMA2 # 计算DIFF,短时EMA - 长时EMA df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2'] # 计算DEA,DIFF的EMA,计算公式是: EMA(DIFF,M) index = 0 DEA = [] for date in df_daily.index: if index == 0: DEA.append(df_daily.loc[date]['DIFF']) else: # M = 9 DEA = EMA(DIFF, 9) DEA.append(2 / (m + 1) * (df_daily.loc[date]['DIFF'] - DEA[index - 1]) + DEA[index - 1]) index += 1 df_daily['DEA'] = DEA # 计算DIFF和DEA的差值 df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA'] # 将delta的移一位,那么前一天delta就变成了今天的pre_delta df_daily['pre_delta'] = df_daily['delta'].shift(1) # 金叉,DIFF上穿DEA,前一日DIFF在DEA下面,当日DIFF在DEA上面 df_daily_gold = df_daily[(df_daily['pre_delta'] <= 0) & (df_daily['delta'] > 0)] # 死叉,DIFF下穿DEA,前一日DIFF在DEA上面,当日DIFF在DEA下面 df_daily_dead = df_daily[(df_daily['pre_delta'] >= 0) & (df_daily['delta'] < 0)] # 保存结果到数据库 update_requests = [] for date in df_daily_gold.index: # 保存时以code和date为查询条件,做更新或者新建,所以对code和date建立索引 # 通过signal字段表示金叉还是死叉,gold表示金叉 update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'gold' } }, upsert=True)) for date in df_daily_dead.index: # 保存时以code和date为查询条件,做更新或者新建,所以对code和date建立索引 # 通过signal字段表示金叉还是死叉,dead表示死叉 update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'dead' } }, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['macd'].bulk_write(update_requests, ordered=False) print('Save MACD, 股票代码:%s, 插入:%4d, 更新:%4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()
def pe_computing(codes=None): ''' 利用eps和close计算股票的pe,并保存到mongodb中 ''' # 从finance_report中取出eps if codes is None: codes = get_all_codes() if isinstance(codes, list) is False: codes = [codes] for code in codes: # 从daily中找出close价格 daily_cursor = daily_collection.find({'code': code}, projection={ 'close': True, 'date': True, '_id': False }) update_requests = [] for daily in daily_cursor: date = daily['date'] finance_eps_cursor = finance_report_collection.find_one( { 'code': code, 'report_date': { '$regex': '\d{4}-12-31' }, 'announced_date': { '$lt': date } }, projection={ 'code': True, 'eps': True, "_id": False }, sort=[('announced_date', DESCENDING)]) if finance_eps_cursor is None: continue if date < '2008-01-01': print('have no date in finance_reprot, code: %s' % code) finance_xiaoxiang().crawl_finance_report(code) break # 计算市盈率 eps = 0 if finance_eps_cursor['eps'] != '-': eps = finance_eps_cursor['eps'] if eps != 0: update_requests.append( UpdateOne({ 'code': code, 'date': date }, {'$set': { 'pe': round(daily['close'] / eps, 4) }})) # 将市盈率更新到mongodb中 if len(update_requests) > 0: update_result = daily_collection.bulk_write(update_requests, ordered=False) print('update pe, code: %s, insert: %s, update: %s' % (code, update_result.upserted_count, update_result.modified_count))
def get_next_trade_day(date): #'%Y-%m-%d' tradingdays_list = get_trade_days() if date in tradingdays_list: today_index = tradingdays_list.index(date) if (today_index == len(tradingdays_list) - 1): return -1 return tradingdays_list[int(today_index) + 1] def get_tushare_code(begin_date='2000-01-01', end_date=None): if (end_date is None): end_date = datetime.now().strftime('%Y-%m-%d') # 初始化pro接口 pro = ts.pro_api( 'f3ef4ac4dc04104e0573aa75c29aef70f30837a416baf6cd1a0f8e81') tradingdays_list = get_trading_dates(begin_date=begin_date, end_date=end_date) # tradingdays_list = get_trade_days(begin_date= begin_date,end_date = end_date) codes = set() for day in tradingdays_list: data = pro.daily(trade_date=day.replace('-', '')) codes = codes | set(data.ts_code) return list(codes).sort() if __name__ == '__main__': # print(get_tushare_code(begin_date = '2015-01-01',end_date = None)) print(len(get_all_codes()))
def compute_boll(begin_date=None, end_date=None, codes=None): """ 计算指定日期内的信号 :param begin_date: 开始日期 :param end_date: 结束日期 """ if codes is None: codes = get_all_codes() if begin_date is None: begin_date = '2008-01-01' if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') if isinstance(codes, list) is False: codes = [codes] for code in codes: try: # 获取后复权的价格,使用后复权的价格计算Boll daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }).hint([('code', 1), ('date', -1)]) df_daily = DataFrame([daily for daily in daily_cursor]) # 计算MB,盘后计算,这里用当日的Close df_daily['MB'] = df_daily['close'].rolling(20).mean() # 计算STD20 df_daily['std'] = df_daily['close'].rolling(20).std() # 计算UP df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std'] # 计算down df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std'] # 将日期作为索引 df_daily.set_index(['date'], inplace=True) # 将close移动一个位置,变为当前索引位置的前收 last_close = df_daily['close'].shift(1) # 突破上轨 shifted_up = df_daily['UP'].shift(1) df_daily['up_mask'] = (last_close <= shifted_up) & ( df_daily['close'] > shifted_up) # 突破下轨 shifted_down = df_daily['DOWN'].shift(1) df_daily['down_mask'] = (last_close >= shifted_down) & ( df_daily['close'] < shifted_down) # 过滤结果 df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']] df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'], 1, inplace=True) # 将信号保存到数据库 update_requests = [] for index in df_daily.index: doc = { 'code': code, 'date': index, # 方向,向上突破 up,向下突破 down 'direction': 'up' if df_daily.loc[index]['up_mask'] else 'down' } update_requests.append( UpdateOne(doc, {'$set': doc}, upsert=True)) if len(update_requests) > 0: DB_CONN['boll'].create_index([("code", 1), ("date", -1)], background=True) update_result = DB_CONN['boll'].bulk_write(update_requests, ordered=False) print('%s, upserted: %4d, modified: %4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: traceback.print_exc()
def compute_fractal(begin_date, end_date): # 获取所有股票代码 codes = get_all_codes() # 计算每个股票的信号 for code in codes: try: # 获取后复权的价格,使用后复权的价格计算分型信号 daily_cursor = DB_CONN['daily_hfq'].find( {'code': code, 'date': {'$gte': begin_date, '$lte': end_date}, 'index': False}, sort=[('date', ASCENDING)], projection={'date': True, 'high': True, 'low': True, '_id': False} ) df_daily = DataFrame([daily for daily in daily_cursor]) # 设置日期作为索引 df_daily.set_index(['date'], 1, inplace=True) # 通过shift,将前两天和后两天对齐到中间一天 df_daily_shift_1 = df_daily.shift(1) df_daily_shift_2 = df_daily.shift(2) df_daily_shift_3 = df_daily.shift(3) df_daily_shift_4 = df_daily.shift(4) # 顶分型,中间日的最高价既大于前两天的最高价,也大于后两天的最高价 df_daily['up'] = (df_daily_shift_2['high'] > df_daily['high']) & \ (df_daily_shift_2['high'] > df_daily_shift_1['high']) & \ (df_daily_shift_2['high'] > df_daily_shift_3['high']) & \ (df_daily_shift_2['high'] > df_daily_shift_4['high']) # 底分型,中间日的最低价既小于前两天的最低价,也小于后两天的最低价 df_daily['down'] = (df_daily_shift_2['low'] < df_daily['low']) & \ (df_daily_shift_2['low'] < df_daily_shift_1['low']) & \ (df_daily_shift_2['low'] < df_daily_shift_3['low']) & \ (df_daily_shift_2['low'] < df_daily_shift_4['low']) # 只保留了出现顶分型和低分型信号的日期 df_daily = df_daily[(df_daily['up'] | df_daily['down'])] # 抛掉不用的数据 df_daily.drop(['high', 'low'], 1, inplace=True) print(df_daily) # 将信号保存到数据库 , update_requests = [] # 保存的数据结果时,code、date和信号的方向 for date in df_daily.index: doc = { 'code': code, 'date': date, # up: 顶分型, down:底分型 'direction': 'up' if df_daily.loc[date]['up'] else 'down' } # 保存时以code、date和direction做条件,那么就需要在这三个字段上建立索引 # db.fractal_signal.createIndex({'code': 1, 'date': 1, 'direction': 1}) update_requests.append( UpdateOne(doc, {'$set': doc}, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['fractal_signal'].bulk_write(update_requests, ordered=False) print('%s, upserted: %4d, modified: %4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()
def fill_au_factor_pre_close(begin_date=None, end_date=None, codes=None): """ 为daily数据集填充: 1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close 2. pre_close = close(-1) * au_factor(-1)/au_factor :param begin_date: 开始日期 :param end_date: 结束日期 """ if codes is None: all_codes = get_all_codes() else: if isinstance(codes, list) is False: codes = [codes] all_codes = codes if begin_date is None: begin_date = '2008-01-01' if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') for code in all_codes: hfq_daily_cursor = DB_CONN['daily_hfq'].find( {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False}, sort=[('date', ASCENDING)], projection={'date': True, 'close': True}).hint([('code', 1), ('date', -1)]).hint([('code', 1), ('date',-1)]) date_hfq_close_dict = dict([(x['date'], x['close']) for x in hfq_daily_cursor]) daily_cursor = DB_CONN['daily_none'].find( {'code': code, 'date': {'$lte': end_date, '$gte': begin_date}, 'index': False}, sort=[('date', ASCENDING)], projection={'date': True, 'close': True} ).hint([('code', 1), ('date', -1)]) last_close = -1 last_au_factor = -1 update_requests = [] for daily in daily_cursor: date = daily['date'] try: close = daily['close'] doc = dict() au_factor = round(date_hfq_close_dict[date] / close, 2) doc['au_factor'] = au_factor if last_close != -1 and last_au_factor != -1: pre_close = last_close * last_au_factor / au_factor doc['pre_close'] = round(pre_close, 2) last_au_factor = au_factor last_close = close update_requests.append( UpdateOne( {'code': code, 'date': date, 'index': False}, {'$set': doc})) except: print('ERROR happen when calculate au_factor, code: %s,date: %s' % (code, date), flush=True) # 恢复成初始值,防止用错 last_close = -1 last_au_factor = -1 if len(update_requests) > 0: update_result = DB_CONN['daily_none'].bulk_write(update_requests, ordered=False) print('fill au_factor and pre_close, code: %s, update: %4d, insert: %s' % (code, update_result.modified_count, update_result.upserted_count), flush=True)
def crawl_finance_report(): # 先获取所有的股票列表 codes = get_all_codes(2) # 创建连接池 conn_pool = urllib3.PoolManager() # 抓取的财务地址,scode为股票代码 # url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \ # 'type=YJBB20_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \ # '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}' url = 'http://datacenter.eastmoney.com/api/data/get?callback=jQuery11230813372504046614_' \ '1613984932139&st=REPORTDATE&sr=-1&ps={pagesize}&p={page}&sty=ALL&filter=(SECURITY_CODE%3D%22{0}%22)' \ '&token=894050c76af8597a853f5b408b759f5d&type=RPT_LICO_FN_CPD' pagesize = '1000' #此时数据只有一页 page = '1' url = url.replace('{pagesize}', pagesize) url = url.replace('{page}', page) # 循环抓取所有股票的财务信息 for code in codes: # 替换股票代码,抓取该只股票的财务数据 response = conn_pool.request('GET', url.replace('{0}', code)) # 解析抓取结果 raw_buff = response.data.decode('UTF-8') buff = raw_buff[raw_buff.find('{'):raw_buff.rfind('}') + 1] result = json.loads(buff) # 取出数据 if (result is None): print('reports is None = ', code) continue if ('result' in result and result['result'] is not None): if ('data' in result['result'] and result['result']['data'] is not None): reports = result['result']['data'] else: print('result or data not in reports = ', code, '\n', result) continue else: print('result or data not in reports = ', code, '\n', result) continue # 更新数据库的请求列表 update_requests = [] # 循环处理所有报告数据 for report in reports: if (report['REPORTDATE'] is None or len(report['REPORTDATE']) < 10): print('REPORTDATE ERROR ', report) continue if (report['UPDATE_DATE'] is None or len(report['UPDATE_DATE']) < 10): print('UPDATE_DATE ERROR ', report) continue doc = { 'code': code, # REPORTDATE报告期 'report_date': report['REPORTDATE'][:10], # 公告日期 # UPDATE_DATE最新公告时间 'announced_date': report['UPDATE_DATE'][:10], # 每股收益(元) 'eps': report['BASIC_EPS'], #每股收益(扣除)(元) 'DEDUCT_BASIC_EPS': report['DEDUCT_BASIC_EPS'], #营业收入 'TOTAL_OPERATE_INCOME': report['TOTAL_OPERATE_INCOME'], #净利润 'PARENT_NETPROFIT': report['PARENT_NETPROFIT'], # YSTZ营业收入同比增长(%) 'income_ratio': report['YSTZ'], # SJLTZ净利润同比增长(%) 'netprofit_ratio': report['SJLTZ'], # BPS每股净资产(元) 'BPS': report['BPS'], # MGJYXJJE每股经营现金流量(元) 'operating_per_share': report['MGJYXJJE'], # YSHZ营业收入季度环比增长(%) 'income_quart_ratio': report['YSHZ'], # SJLHZ 净利润季度环比增长(%) 'netprofit_quart_ratio': report['SJLHZ'], # PUBLISHNAME 怀疑是行业分类 'publish_name': report['PUBLISHNAME'], # NOTICE_DATE 首次公告日期 'notice_date': report['NOTICE_DATE'], # QDATE 季度 2017Q3 'quart_date': report['QDATE'], # SECURITY_CODE 股票代码1 600000SH 'code1': report['SECUCODE'], # SECURITY_NAME_ABBR 名称 'name': report['SECURITY_NAME_ABBR'], # TRADE_MARKET 交易所板块 'trade_market': report['TRADE_MARKET'], # SECURITY_TYPE 股票类别 A股 'security_type': report['SECURITY_TYPE'], #ASSIGNDSCRPT 利润分配 如"10派1.80元(含税)" 'dividend': report['ASSIGNDSCRPT'] } # 将更新请求添加到列表中,更新时的查询条件为code、report_date,为了快速保存数据,需要增加索引 # db.finance_report.createIndex({'code':1, 'report_date':1}) update_requests.append( UpdateOne( { 'code': code, 'report_date': doc['report_date'] }, # upsert=True保证了如果查不到数据,则插入一条新数据 {'$set': doc}, upsert=True)) # 如果更新数据的请求列表不为空,则写入数据库 if len(update_requests) > 0: # 采用批量写入的方式,加快保存速度 update_result = DB_CONN['finance_report'].bulk_write( update_requests, ordered=False) print('股票 %s, 财报,更新 %d, 插入 %d' % (code, update_result.modified_count, update_result.upserted_count))
def crawl_finance_report(self): """ 从东方财富网站抓取三张财务报表 :return: """ # 先获取所有的股票列表 codes = get_all_codes() # 创建连接池 conn_pool = urllib3.PoolManager() # 抓取的网址,两个替换参数 {1} - 财报类型 {2} - 股票代码 url = 'http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?' \ 'type={1}&token=70f12f2f4f091e459a279469fe49eca5&' \ 'st=reportdate&sr=-1&p=1&ps=500&filter=(scode=%27{2}%27)' \ '&js={%22pages%22:(tp),%22data%22:%20(x)}&rt=51044775#' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36' # 对应的类型,分别资产负债表、现金流量表和利润表 report_types = ['CWBB_ZCFZB', 'CWBB_XJLLB', 'CWBB_LRB'] for code in codes: for report_type in report_types: print('开始抓取财报数据,股票:%s,财报类型:%s' % (code, report_type), flush=True) response = conn_pool.request( 'GET', url.replace('{1}', report_type).replace('{2}', code), headers={'User-Agent': user_agent}) # 解析抓取结果 result = simplejson.loads(response.data.decode('UTF-8'), 'UTF-8') reports = result['data'] update_requests = [] for report in reports: # 更新字段端 try: report.update({ # 公告日和报告期只保留年月日 'announced_date': report['noticedate'][0:10], 'report_date': report['reportdate'][0:10], # 股票名称和股票代码的字段名和系统设计保持一致 'code': code, 'name': report['sname'] }) update_requests.append( UpdateOne( { 'code': code, 'report_date': report['report_date'], 'announced_date': report['announced_date'] }, {'$set': report}, upsert=True)) except: print('解析出错,股票:%s 财报类型:%s' % (code, report_type)) if len(update_requests) > 0: update_result = self.db[report_type].bulk_write( update_requests, ordered=False) print('股票 %s, 财报类型:%s,更新: %4d, 新增: %4d' % (code, report_type, update_result.modified_count, update_result.upserted_count))
def compute_rsi(begin_date, end_date): """ 计算指定时间段内的RSI信号,并保存到数据库中 :param begin_date: 开始日期 :param end_date: 结束日期 """ # 获取所有股票代码 codes = get_all_codes() # 计算RSI N = 12 # 计算所有股票的RSI信号 for code in codes: try: # 获取后复权的价格,使用后复权的价格计算RSI daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }) df_daily = DataFrame([daily for daily in daily_cursor]) # 如果查询出的行情数量还不足以计算N天的平均值,则不再参与计算 if df_daily.index.size < N: print('数据量不够: %s' % code, flush=True) continue # 将日期作为索引 df_daily.set_index(['date'], 1, inplace=True) # 将close移一位作为当日的pre_close df_daily['pre_close'] = df_daily['close'].shift(1) # 计算当日的涨跌幅:(close - pre_close) * 100 / pre_close df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close'] ) * 100 / df_daily['pre_close'] # 只保留上涨的日期的涨幅 df_daily['up_pct'] = DataFrame({ 'up_pct': df_daily['change_pct'], 'zero': 0 }).max(1) # 计算RSI df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() / abs( df_daily['change_pct']).rolling(N).mean() * 100 # 移位 df_daily['PREV_RSI'] = df_daily['RSI'].shift(1) # 超买,RSI下穿80,作为卖出信号 df_daily_over_bought = df_daily[(df_daily['RSI'] < 80) & (df_daily['PREV_RSI'] >= 80)] # 超卖,RSI上穿20,作为买入信号 df_daily_over_sold = df_daily[(df_daily['RSI'] > 20) & (df_daily['PREV_RSI'] <= 20)] # 保存结果到数据库,要以code和date创建索引,db.rsi.createIndex({'code': 1, 'date': 1}) update_requests = [] # 超买数据,以code和date为key更新数据,signal为over_bought for date in df_daily_over_bought.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'over_bought' } }, upsert=True)) # 超卖数据,以code和date为key更新数据,signal为over_sold for date in df_daily_over_sold.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'over_sold' } }, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['rsi'].bulk_write(update_requests, ordered=False) print('Save RSI, 股票代码:%s, 插入:%4d, 更新:%4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True)
def compute_fractal(begin_date=None, end_date=None, codes=None): """ 计算指定日期内的信号 :param begin_date: 开始日期 :param end_date: 结束日期 """ if codes is None: codes = get_all_codes() if begin_date is None: begin_date = '2008-01-01' if end_date is None: end_date = datetime.now().strftime('%Y-%m-%d') if isinstance(codes, list) is False: codes = [codes] for code in codes: try: # 获取后复权的价格,使用后复权的价格计算分型 daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'high': True, 'low': True, '_id': False }).hint([('code', 1), ('date', -1)]) df_daily = DataFrame([daily for daily in daily_cursor]) df_daily.set_index(['date'], 1, inplace=True) df_daily_left_shift_1 = df_daily.shift(-1) df_daily_left_shift_2 = df_daily.shift(-2) df_daily_right_shift_1 = df_daily.shift(1) df_daily_right_shift_2 = df_daily.shift(2) df_daily['up'] = (df_daily['high'] > df_daily_left_shift_1['high']) & \ (df_daily['high'] > df_daily_left_shift_2['high']) & \ (df_daily['high'] > df_daily_right_shift_1['high']) & \ (df_daily['high'] > df_daily_right_shift_2['high']) df_daily['down'] = (df_daily['low'] < df_daily_left_shift_1['low']) & \ (df_daily['low'] < df_daily_left_shift_2['low']) & \ (df_daily['low'] < df_daily_right_shift_1['low']) & \ (df_daily['low'] < df_daily_right_shift_2['low']) df_daily = df_daily[(df_daily['up'] | df_daily['down'])] # 保存结果到数据库 df_daily.drop(['high', 'low'], 1, inplace=True) print(df_daily) # 将信号保存到数据库 update_requests = [] for index in df_daily.index: doc = { 'code': code, 'date': index, # 方向,向上突破 up,向下突破 down 'direction': 'up' if df_daily.loc[index]['up'] else 'down' } update_requests.append( UpdateOne(doc, {'$set': doc}, upsert=True)) if len(update_requests) > 0: DB_CONN['fractal_signal'].create_index([("code", 1), ("date", -1)], background=True) update_result = DB_CONN['fractal_signal'].bulk_write( update_requests, ordered=False) print('%s, upserted: %4d, modified: %4d' % (code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()