def rebuild_dist(): """ 重建股權分散表資料庫 """ # 清除現有資料 db_conn = db.get_connection() for level in range(1, 18): sql = 'DELETE FROM level%02d;' % level db_conn.execute(sql) db_conn.commit() db_conn.execute('VACUUM') # cannot VACUUM from within a transaction db_conn.close() # 確認可以重建的日期 csv_dir = common.get_cache_dir('tdcc') date_list = [] for filename in os.listdir(csv_dir): match = re.match(r'dist-(\d{8}).csv.xz', filename) if match is not None: date_list.append(match.group(1)) # 依日期順序重建資料 date_list.sort() for csv_date in date_list: import_dist(csv_date)
def sync_short_borrowed(trading_date): """ 可借券賣出 - 只有當天資料 - 待確認資料切換時間 """ dsitem = 'short_borrowed' logger = common.get_logger('finance') datestr = trading_date.replace('-', '') if not has_cache(dsitem, datestr, 'csv'): session = common.get_session(False) url = 'http://www.twse.com.tw/SBL/TWT96U?response=csv' resp = session.get(url) ds = resp.text line1 = ds[:ds.find('\r\n')] match = re.search(r'(\d{3})年(\d{2})月(\d{2})日', line1) if match is None: logger.error('可借券賣出的 CSV 無法取得日期字串') return yy = int(match.group(1)) + 1911 mm = match.group(2) dd = match.group(3) dsdate = '%04d%s%s' % (yy, mm, dd) if dsdate != datestr: logger.error('可借券賣出的資料日期與指定日期不同, 資料日期 %s, 指定日期 %s', dsdate, datestr) return logger.error('可借券賣出的資料寫入快取: %s', datestr) save_cache(dsitem, datestr, ds, 'csv') db_conn = db.get_connection() sql = ''' INSERT INTO `short_sell` ( trading_date, security_id, borrowed ) VALUES (?,?,?) ''' logger.info('載入 %s 的可借券賣出', datestr) csv_path = get_cache_path(dsitem, datestr, 'csv') col_names = ['sec1', 'vol1', 'sec2', 'vol2', 'shit'] df = pandas.read_csv(csv_path, sep=',', skiprows=3, header=None, names=col_names) cnt = 0 for index, row in df.iterrows(): security_id = row['sec1'].strip('="') borrowed = int(row['vol1'].replace(',', '')) db_conn.execute(sql, (trading_date, security_id, borrowed)) cnt += 1 security_id = row['sec2'].strip('="') if security_id != '_': borrowed = int(row['vol2'].replace(',', '')) db_conn.execute(sql, (trading_date, security_id, borrowed)) cnt += 1 db_conn.commit() db_conn.close()
def sync_dataset(dsitem, trading_date): """ 同步資料集共用流程 * HTTP 日期格式: 108/05/29 * DB, Cache 日期格式: 2019-05-29 """ logger = common.get_logger('finance') dtm = re.match(r'(\d{4})-(\d{2})-(\d{2})', trading_date) tokens = [str(int(dtm.group(1)) - 1911), dtm.group(2), dtm.group(3)] datestr = '/'.join(tokens) format = 'json' this_mod = sys.modules[__name__] if has_cache(dsitem, trading_date, format): # 載入快取資料集 logger.info('套用 %s 的 %s 快取', trading_date, dsitem) dataset = load_cache(dsitem, trading_date, format) else: # 下載資料集 dataset = None repeat = 0 hookfunc = getattr(this_mod, 'download_' + dsitem) while dataset is None and repeat < REPEAT_LIMIT: repeat += 1 if repeat > 1: time.sleep(REPEAT_INTERVAL) try: logger.info('下載 %s 的 %s', trading_date, dsitem) dataset = hookfunc(datestr) logger.info('儲存 %s 的 %s', trading_date, dsitem) save_cache(dsitem, trading_date, dataset, format) except Exception as ex: logger.error('無法取得 %s 的 %s (重試: %d, %s)', trading_date, dsitem, repeat, ex.reason) if dataset is None: return # return # 匯入資料庫 dbcon = db.get_connection() hookfunc = hookfunc = getattr(this_mod, 'import_' + dsitem) try: hookfunc(dbcon, trading_date, dataset) logger.info('匯入 %s 的 %s', trading_date, dsitem) except sqlite3.IntegrityError as ex: logger.warning('已經匯入過 %s 的 %s', trading_date, dsitem) except Exception as ex: # TODO: ex.args[0] 不確定是否可靠, 需要再確認 logger.error('無法匯入 %s 的 %s (%s)', trading_date, dsitem, ex.args[0]) dbcon.commit() dbcon.close()
def import_dist(csv_date='latest'): """ 匯入指定日期的股權分散表到資料庫 """ logger = common.get_logger('finance') if csv_date == 'latest': max_date = '' dir = os.path.expanduser('~/.twnews/cache/tdcc') for filename in os.listdir(dir): match = re.match(r'dist-(\d{8}).csv', filename) if match is not None: if max_date < match.group(1): max_date = match.group(1) csv_date = max_date iso_date = re.sub(r'(\d{4})(\d{2})(\d{2})', r'\1-\2-\3', csv_date) csv_path = os.path.expanduser('~/.twnews/cache/tdcc/dist-%s.csv' % csv_date) if not os.path.isfile(csv_path): logger.error('沒有這個日期的股權分散表檔案: %s', csv_path) return db_conn = db.get_connection() col_names = [ 'trading_date', 'security_id', 'level', 'numof_holders', 'numof_stocks', 'percentof_stocks' ] df = pandas.read_csv(csv_path, skiprows=1, header=None, names=col_names) # print(df.head(3)) # print(df.tail(3)) sql_template = ''' INSERT INTO level%02d ( trading_date, security_id, numof_holders, numof_stocks, percentof_stocks ) VALUES (?,?,?,?,?); ''' for index, row in df.iterrows(): sql = sql_template % row['level'] db_conn.execute(sql, ( iso_date, row['security_id'], row['numof_holders'], row['numof_stocks'], row['percentof_stocks'] )) if index > 0 and index % 5000 == 0: msg = '已儲存 %s 的 %d 筆股權分散資料' % (iso_date, index) logger.debug(msg) db_conn.commit() db_conn.close()
def sync_margin_trading(trading_date): """ 融資融券 """ dsitem = 'margin_trading' logger = common.get_logger('finance') datestr = trading_date.replace('-', '') # 快取處理 if has_cache(dsitem, datestr): logger.info('載入 %s 的融資融券', datestr) ds = load_cache(dsitem, datestr) else: logger.info('沒有 %s 的融資融券', datestr) session = common.get_session(False) url = 'http://www.twse.com.tw/exchangeReport/MI_MARGN?response=json&date=%s&selectType=ALL' % datestr resp = session.get(url) ds = resp.json() status = ds['stat'] # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗 # 成功: OK # 失敗: 查詢日期大於可查詢最大日期,請重新查詢! # 很抱歉,目前線上人數過多,請您稍候再試 if status != 'OK': logger.error('無法取得 %s 的融資融券資料, 原因: %s', datestr, status) return if len(ds['data']) == 0: logger.error('沒有 %s 的融資融券資料, 可能尚未結算或是非交易日', datestr) return logger.info('儲存 %s 的融資融券', datestr) save_cache(dsitem, datestr, ds) db_conn = db.get_connection() sql = ''' INSERT INTO `margin` ( trading_date, security_id, security_name, buying_balance, selling_balance ) VALUES (?,?,?,?,?) ''' for detail in ds['data']: security_id = detail[0] security_name = detail[1].strip() buying_balance = int(detail[6].replace(',', '')) selling_balance = int(detail[12].replace(',', '')) db_conn.execute(sql, (trading_date, security_id, security_name, buying_balance, selling_balance)) logger.debug( '[%s %s] 融資餘額: %s, 融券餘額: %s' % (security_id, security_name, buying_balance, selling_balance)) db_conn.commit() db_conn.close()
def sync_institution_trading(trading_date): """ 三大法人 """ dsitem = 'institution_trading' logger = common.get_logger('finance') datestr = trading_date.replace('-', '') # 快取處理 if has_cache(dsitem, datestr): logger.info('載入 %s 的三大法人', datestr) ds = load_cache(dsitem, datestr) else: logger.info('沒有 %s 的三大法人', datestr) session = common.get_session(False) url = 'http://www.twse.com.tw/fund/T86?response=json&date=%s&selectType=ALL' % datestr resp = session.get(url) ds = resp.json() status = ds['stat'] # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗 # 成功: OK # 失敗: 查詢日期大於可查詢最大日期,請重新查詢! # 很抱歉,目前線上人數過多,請您稍候再試 if status != 'OK': logger.error('無法取得 %s 的三大法人資料, 原因: %s', datestr, status) return logger.info('儲存 %s 的三大法人', datestr) save_cache(dsitem, datestr, ds) # 匯入 SQLite db_conn = db.get_connection() sql = ''' INSERT INTO `institution` ( trading_date, security_id, security_name, foreign_trend, stic_trend, dealer_trend ) VALUES (?,?,?,?,?,?) ''' for detail in ds['data']: security_id = detail[0] security_name = detail[1].strip() foreign_trend = int(detail[4].replace(',', '')) // 1000 stic_trend = int(detail[10].replace(',', '')) // 1000 dealer_trend = int(detail[11].replace(',', '')) // 1000 db_conn.execute(sql, (trading_date, security_id, security_name, foreign_trend, stic_trend, dealer_trend)) logger.debug('[%s %s] 外資: %s 投信: %s 自營商: %s', security_id, security_name, foreign_trend, stic_trend, dealer_trend) db_conn.commit() db_conn.close()
def sync_dataset(dsitem, trading_date): """ 同步資料集共用流程 """ logger = common.get_logger('finance') datestr = trading_date.replace('-', '') format = 'csv' if dsitem == 'borrowed' else 'json' this_mod = sys.modules[__name__] if has_cache(dsitem, datestr, format): # 載入快取資料集 logger.info('套用 %s 的 %s 快取', trading_date, dsitem) dataset = load_cache(dsitem, datestr, format) else: # 下載資料集 dataset = None repeat = 0 hookfunc = getattr(this_mod, 'download_' + dsitem) while dataset is None and repeat < REPEAT_LIMIT: repeat += 1 if repeat > 1: time.sleep(REPEAT_INTERVAL) try: logger.info('下載 %s 的 %s', trading_date, dsitem) dataset = hookfunc(datestr) logger.info('儲存 %s 的 %s', trading_date, dsitem) save_cache(dsitem, datestr, dataset, format) except Exception as ex: # 2019-08-08: 這裡的重試效果不夠理想,3 次重試的結果都失敗,可能要改用別的重試機制 logger.error('無法取得 %s 的 %s (重試: %d, %s)', trading_date, dsitem, repeat, ex.reason) if dataset is None: return # 匯入資料庫 dbcon = db.get_connection() hookfunc = hookfunc = getattr(this_mod, 'import_' + dsitem) try: hookfunc(dbcon, trading_date, dataset) logger.info('匯入 %s 的 %s', trading_date, dsitem) except sqlite3.IntegrityError as ex: logger.warning('已經匯入過 %s 的 %s', trading_date, dsitem) except Exception as ex: # TODO: ex.args[0] 不確定是否可靠, 需要再確認 logger.error('無法匯入 %s 的 %s (%s)', trading_date, dsitem, ex.args[0]) dbcon.commit() dbcon.close()
def sync_short_selled(trading_date): """ 已借券賣出 """ dsitem = 'short_selled' logger = common.get_logger('finance') datestr = trading_date.replace('-', '') # 快取處理 if has_cache(dsitem, datestr): logger.info('載入 %s 的已借券賣出', datestr) ds = load_cache(dsitem, datestr) else: logger.info('沒有 %s 的已借券賣出', datestr) session = common.get_session(False) url = 'http://www.twse.com.tw/exchangeReport/TWT93U?response=json&date=%s' % datestr resp = session.get(url) ds = resp.json() status = ds['stat'] # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗 # 成功: OK # 失敗: 查詢日期大於可查詢最大日期,請重新查詢! # 很抱歉,目前線上人數過多,請您稍候再試 if status != 'OK': logger.error('無法取得 %s 的已借券賣出資料, 原因: %s', datestr, status) return if len(ds['data']) == 0: logger.error('尚未生成 %s 的已借券賣出資料, 可能尚未結算或非交易日', datestr) return logger.info('儲存 %s 的已借券賣出', datestr) save_cache(dsitem, datestr, ds) db_conn = db.get_connection() sql = ''' UPDATE `short_sell` SET `security_name`=?, `selled`=? WHERE `trading_date`=? AND `security_id`=? ''' for detail in ds['data']: security_id = detail[0] security_name = detail[1].strip() balance = int(detail[12].replace(',', '')) // 1000 if security_id != '': db_conn.execute( sql, (security_name, balance, trading_date, security_id)) logger.debug('[%s %s] 已借券賣出餘額: %s', security_id, security_name, balance) db_conn.commit() db_conn.close()
def sync_block_trading(trading_date): """ 鉅額交易 """ dsitem = 'block_trading' logger = common.get_logger('finance') datestr = trading_date.replace('-', '') # 快取處理 if has_cache(dsitem, datestr): logger.info('載入 %s 的鉅額交易', datestr) ds = load_cache(dsitem, datestr) else: logger.info('沒有 %s 的鉅額交易', datestr) session = common.get_session(False) url = 'http://www.twse.com.tw/block/BFIAUU?response=json&date=%s&selectType=S' % datestr resp = session.get(url) ds = resp.json() status = ds['stat'] # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗 # 成功: OK # 失敗: 查詢日期大於可查詢最大日期,請重新查詢! # 很抱歉,目前線上人數過多,請您稍候再試 if status != 'OK': logger.error('無法取得 %s 的鉅額交易資料, 原因: %s', datestr, status) return if len(ds['data']) == 0: logger.error('沒有 %s 的鉅額交易資料, 可能尚未結算或是非交易日', datestr) return logger.info('儲存 %s 的鉅額交易', datestr) save_cache(dsitem, datestr, ds) db_conn = db.get_connection() sql = ''' INSERT INTO `block` ( trading_date, security_id, security_name, tick_rank, tick_type, close, volume, total ) VALUES (?,?,?,?,?,?,?,?) ''' tick_rank = {} for trade in ds['data']: if trade[0] == '總計': break security_id = trade[0] security_name = trade[1] tick_type = trade[2] close = float(trade[3].replace(',', '')) volume = int(trade[4].replace(',', '')) total = int(trade[5].replace(',', '')) if security_id not in tick_rank: tick_rank[security_id] = 1 else: tick_rank[security_id] += 1 db_conn.execute( sql, (trading_date, security_id, security_name, tick_rank[security_id], tick_type, close, volume, total)) logger.debug('[%s %s] #%d %s 成交價: %s 股數: %s 金額: %s' % (security_id, security_name, tick_rank[security_id], tick_type, close, volume, total)) db_conn.commit() db_conn.close()
def sync_etf_net(trading_date): """ https://mis.twse.com.tw/stock/data/all_etf.txt { "a1": [ { "msgArray": [ { "a": "", 代碼 "b": "", 名稱 "c": "", 發行量 "d": "", 與前日發行量變化 "e": "", 成交價 "f": "", 淨值 "g": "", 折溢價率 "h": "", 前日淨值 "i": "", 日期 "j": "", 時間 "k": "", ETF 類型 (1~4) }, ... ] "refURL": "https://www.kgifund.com.tw/ETF/RWD/Introduction.aspx", "userDelay": "15000", "rtMessage": "OK", "rtCode": "0000" }, ... {} <-- 最後有一組空的 ] } """ dsitem = 'etf_net' logger = common.get_logger('finance') datestr = trading_date.replace('-', '') # 快取處理 if has_cache(dsitem, datestr): logger.info('載入 %s 的 ETF 溢價率快取', datestr) ds = load_cache(dsitem, datestr) else: logger.info('沒有 %s 的 ETF 溢價率快取', datestr) session = common.get_session(False) resp = session.get('https://mis.twse.com.tw/stock/data/all_etf.txt') ds = resp.json() dsdate = ds['a1'][1]['msgArray'][0]['i'] if datestr == dsdate: logger.info('儲存 %s 的 ETF 溢價率快取', datestr) save_cache(dsitem, datestr, ds) else: logger.info('無法取得 %s 的 ETF 溢價率資料', datestr) return # 來源資料轉換 key/value 形式 etf_dict = {} for fund in ds['a1']: if 'msgArray' in fund: for etf in fund['msgArray']: etf_dict[etf['a']] = etf # 依證券代碼順序處理 db_conn = db.get_connection() sql = ''' INSERT INTO `etf_offset` ( trading_date, security_id, security_name, close, net, offset ) VALUES (?,?,?,?,?,?) ''' for k in sorted(etf_dict.keys()): etf = etf_dict[k] db_conn.execute( sql, (trading_date, etf['a'], etf['b'], etf['e'], etf['f'], etf['g'])) logger.debug('%s, %s, %s, %s%%', etf['a'], etf['b'], etf['f'], etf['g']) db_conn.commit() db_conn.close()