Exemple #1
0
def rebuild_dist():
    """
    重建股權分散表資料庫
    """
    # 清除現有資料
    db_conn = db.get_connection()
    for level in range(1, 18):
        sql = 'DELETE FROM level%02d;' % level
        db_conn.execute(sql)
    db_conn.commit()
    db_conn.execute('VACUUM')  # cannot VACUUM from within a transaction
    db_conn.close()

    # 確認可以重建的日期
    csv_dir = common.get_cache_dir('tdcc')
    date_list = []
    for filename in os.listdir(csv_dir):
        match = re.match(r'dist-(\d{8}).csv.xz', filename)
        if match is not None:
            date_list.append(match.group(1))

    # 依日期順序重建資料
    date_list.sort()
    for csv_date in date_list:
        import_dist(csv_date)
Exemple #2
0
def sync_short_borrowed(trading_date):
    """
    可借券賣出
    - 只有當天資料
    - 待確認資料切換時間
    """
    dsitem = 'short_borrowed'
    logger = common.get_logger('finance')
    datestr = trading_date.replace('-', '')

    if not has_cache(dsitem, datestr, 'csv'):
        session = common.get_session(False)
        url = 'http://www.twse.com.tw/SBL/TWT96U?response=csv'
        resp = session.get(url)
        ds = resp.text
        line1 = ds[:ds.find('\r\n')]
        match = re.search(r'(\d{3})年(\d{2})月(\d{2})日', line1)
        if match is None:
            logger.error('可借券賣出的 CSV 無法取得日期字串')
            return
        yy = int(match.group(1)) + 1911
        mm = match.group(2)
        dd = match.group(3)
        dsdate = '%04d%s%s' % (yy, mm, dd)
        if dsdate != datestr:
            logger.error('可借券賣出的資料日期與指定日期不同, 資料日期 %s, 指定日期 %s', dsdate,
                         datestr)
            return
        logger.error('可借券賣出的資料寫入快取: %s', datestr)
        save_cache(dsitem, datestr, ds, 'csv')

    db_conn = db.get_connection()
    sql = '''
        INSERT INTO `short_sell` (
            trading_date, security_id, borrowed
        ) VALUES (?,?,?)
    '''
    logger.info('載入 %s 的可借券賣出', datestr)
    csv_path = get_cache_path(dsitem, datestr, 'csv')
    col_names = ['sec1', 'vol1', 'sec2', 'vol2', 'shit']
    df = pandas.read_csv(csv_path,
                         sep=',',
                         skiprows=3,
                         header=None,
                         names=col_names)
    cnt = 0
    for index, row in df.iterrows():
        security_id = row['sec1'].strip('="')
        borrowed = int(row['vol1'].replace(',', ''))
        db_conn.execute(sql, (trading_date, security_id, borrowed))
        cnt += 1
        security_id = row['sec2'].strip('="')
        if security_id != '_':
            borrowed = int(row['vol2'].replace(',', ''))
            db_conn.execute(sql, (trading_date, security_id, borrowed))
            cnt += 1
    db_conn.commit()
    db_conn.close()
Exemple #3
0
def sync_dataset(dsitem, trading_date):
    """
    同步資料集共用流程

    * HTTP 日期格式: 108/05/29
    * DB, Cache 日期格式: 2019-05-29
    """
    logger = common.get_logger('finance')
    dtm = re.match(r'(\d{4})-(\d{2})-(\d{2})', trading_date)
    tokens = [str(int(dtm.group(1)) - 1911), dtm.group(2), dtm.group(3)]
    datestr = '/'.join(tokens)
    format = 'json'
    this_mod = sys.modules[__name__]

    if has_cache(dsitem, trading_date, format):
        # 載入快取資料集
        logger.info('套用 %s 的 %s 快取', trading_date, dsitem)
        dataset = load_cache(dsitem, trading_date, format)
    else:
        # 下載資料集
        dataset = None
        repeat = 0
        hookfunc = getattr(this_mod, 'download_' + dsitem)
        while dataset is None and repeat < REPEAT_LIMIT:
            repeat += 1
            if repeat > 1:
                time.sleep(REPEAT_INTERVAL)
            try:
                logger.info('下載 %s 的 %s', trading_date, dsitem)
                dataset = hookfunc(datestr)
                logger.info('儲存 %s 的 %s', trading_date, dsitem)
                save_cache(dsitem, trading_date, dataset, format)
            except Exception as ex:
                logger.error('無法取得 %s 的 %s (重試: %d, %s)', trading_date, dsitem,
                             repeat, ex.reason)

    if dataset is None:
        return

    # return

    # 匯入資料庫
    dbcon = db.get_connection()
    hookfunc = hookfunc = getattr(this_mod, 'import_' + dsitem)
    try:
        hookfunc(dbcon, trading_date, dataset)
        logger.info('匯入 %s 的 %s', trading_date, dsitem)
    except sqlite3.IntegrityError as ex:
        logger.warning('已經匯入過 %s 的 %s', trading_date, dsitem)
    except Exception as ex:
        # TODO: ex.args[0] 不確定是否可靠, 需要再確認
        logger.error('無法匯入 %s 的 %s (%s)', trading_date, dsitem, ex.args[0])
    dbcon.commit()
    dbcon.close()
Exemple #4
0
def import_dist(csv_date='latest'):
    """
    匯入指定日期的股權分散表到資料庫
    """
    logger = common.get_logger('finance')

    if csv_date == 'latest':
        max_date = ''
        dir = os.path.expanduser('~/.twnews/cache/tdcc')
        for filename in os.listdir(dir):
            match = re.match(r'dist-(\d{8}).csv', filename)
            if match is not None:
                if max_date < match.group(1):
                    max_date = match.group(1)
        csv_date = max_date

    iso_date = re.sub(r'(\d{4})(\d{2})(\d{2})', r'\1-\2-\3', csv_date)
    csv_path = os.path.expanduser('~/.twnews/cache/tdcc/dist-%s.csv' % csv_date)
    if not os.path.isfile(csv_path):
        logger.error('沒有這個日期的股權分散表檔案: %s', csv_path)
        return

    db_conn = db.get_connection()
    col_names = [
        'trading_date',
        'security_id',
        'level',
        'numof_holders',
        'numof_stocks',
        'percentof_stocks'
    ]
    df = pandas.read_csv(csv_path, skiprows=1, header=None, names=col_names)
    # print(df.head(3))
    # print(df.tail(3))
    sql_template = '''
    INSERT INTO level%02d (
        trading_date, security_id,
        numof_holders, numof_stocks, percentof_stocks
    ) VALUES (?,?,?,?,?);
    '''
    for index, row in df.iterrows():
        sql = sql_template % row['level']
        db_conn.execute(sql, (
            iso_date,
            row['security_id'],
            row['numof_holders'],
            row['numof_stocks'],
            row['percentof_stocks']
        ))
        if index > 0 and index % 5000 == 0:
            msg = '已儲存 %s 的 %d 筆股權分散資料' % (iso_date, index)
            logger.debug(msg)
    db_conn.commit()
    db_conn.close()
Exemple #5
0
def sync_margin_trading(trading_date):
    """
    融資融券
    """
    dsitem = 'margin_trading'
    logger = common.get_logger('finance')
    datestr = trading_date.replace('-', '')

    # 快取處理
    if has_cache(dsitem, datestr):
        logger.info('載入 %s 的融資融券', datestr)
        ds = load_cache(dsitem, datestr)
    else:
        logger.info('沒有 %s 的融資融券', datestr)
        session = common.get_session(False)
        url = 'http://www.twse.com.tw/exchangeReport/MI_MARGN?response=json&date=%s&selectType=ALL' % datestr
        resp = session.get(url)
        ds = resp.json()
        status = ds['stat']
        # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗
        # 成功: OK
        # 失敗: 查詢日期大於可查詢最大日期,請重新查詢!
        #      很抱歉,目前線上人數過多,請您稍候再試
        if status != 'OK':
            logger.error('無法取得 %s 的融資融券資料, 原因: %s', datestr, status)
            return
        if len(ds['data']) == 0:
            logger.error('沒有 %s 的融資融券資料, 可能尚未結算或是非交易日', datestr)
            return
        logger.info('儲存 %s 的融資融券', datestr)
        save_cache(dsitem, datestr, ds)

    db_conn = db.get_connection()
    sql = '''
        INSERT INTO `margin` (
            trading_date, security_id, security_name,
            buying_balance, selling_balance
        ) VALUES (?,?,?,?,?)
    '''
    for detail in ds['data']:
        security_id = detail[0]
        security_name = detail[1].strip()
        buying_balance = int(detail[6].replace(',', ''))
        selling_balance = int(detail[12].replace(',', ''))
        db_conn.execute(sql, (trading_date, security_id, security_name,
                              buying_balance, selling_balance))
        logger.debug(
            '[%s %s] 融資餘額: %s, 融券餘額: %s' %
            (security_id, security_name, buying_balance, selling_balance))
    db_conn.commit()
    db_conn.close()
Exemple #6
0
def sync_institution_trading(trading_date):
    """
    三大法人
    """
    dsitem = 'institution_trading'
    logger = common.get_logger('finance')
    datestr = trading_date.replace('-', '')

    # 快取處理
    if has_cache(dsitem, datestr):
        logger.info('載入 %s 的三大法人', datestr)
        ds = load_cache(dsitem, datestr)
    else:
        logger.info('沒有 %s 的三大法人', datestr)
        session = common.get_session(False)
        url = 'http://www.twse.com.tw/fund/T86?response=json&date=%s&selectType=ALL' % datestr
        resp = session.get(url)
        ds = resp.json()
        status = ds['stat']
        # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗
        # 成功: OK
        # 失敗: 查詢日期大於可查詢最大日期,請重新查詢!
        #      很抱歉,目前線上人數過多,請您稍候再試
        if status != 'OK':
            logger.error('無法取得 %s 的三大法人資料, 原因: %s', datestr, status)
            return
        logger.info('儲存 %s 的三大法人', datestr)
        save_cache(dsitem, datestr, ds)

    # 匯入 SQLite
    db_conn = db.get_connection()
    sql = '''
        INSERT INTO `institution` (
            trading_date, security_id, security_name,
            foreign_trend, stic_trend, dealer_trend
        ) VALUES (?,?,?,?,?,?)
    '''
    for detail in ds['data']:
        security_id = detail[0]
        security_name = detail[1].strip()
        foreign_trend = int(detail[4].replace(',', '')) // 1000
        stic_trend = int(detail[10].replace(',', '')) // 1000
        dealer_trend = int(detail[11].replace(',', '')) // 1000
        db_conn.execute(sql, (trading_date, security_id, security_name,
                              foreign_trend, stic_trend, dealer_trend))
        logger.debug('[%s %s] 外資: %s 投信: %s 自營商: %s', security_id,
                     security_name, foreign_trend, stic_trend, dealer_trend)
    db_conn.commit()
    db_conn.close()
Exemple #7
0
def sync_dataset(dsitem, trading_date):
    """
    同步資料集共用流程
    """
    logger = common.get_logger('finance')
    datestr = trading_date.replace('-', '')
    format = 'csv' if dsitem == 'borrowed' else 'json'
    this_mod = sys.modules[__name__]

    if has_cache(dsitem, datestr, format):
        # 載入快取資料集
        logger.info('套用 %s 的 %s 快取', trading_date, dsitem)
        dataset = load_cache(dsitem, datestr, format)
    else:
        # 下載資料集
        dataset = None
        repeat = 0
        hookfunc = getattr(this_mod, 'download_' + dsitem)
        while dataset is None and repeat < REPEAT_LIMIT:
            repeat += 1
            if repeat > 1:
                time.sleep(REPEAT_INTERVAL)
            try:
                logger.info('下載 %s 的 %s', trading_date, dsitem)
                dataset = hookfunc(datestr)
                logger.info('儲存 %s 的 %s', trading_date, dsitem)
                save_cache(dsitem, datestr, dataset, format)
            except Exception as ex:
                # 2019-08-08: 這裡的重試效果不夠理想,3 次重試的結果都失敗,可能要改用別的重試機制
                logger.error('無法取得 %s 的 %s (重試: %d, %s)', trading_date, dsitem,
                             repeat, ex.reason)

    if dataset is None:
        return

    # 匯入資料庫
    dbcon = db.get_connection()
    hookfunc = hookfunc = getattr(this_mod, 'import_' + dsitem)
    try:
        hookfunc(dbcon, trading_date, dataset)
        logger.info('匯入 %s 的 %s', trading_date, dsitem)
    except sqlite3.IntegrityError as ex:
        logger.warning('已經匯入過 %s 的 %s', trading_date, dsitem)
    except Exception as ex:
        # TODO: ex.args[0] 不確定是否可靠, 需要再確認
        logger.error('無法匯入 %s 的 %s (%s)', trading_date, dsitem, ex.args[0])
    dbcon.commit()
    dbcon.close()
Exemple #8
0
def sync_short_selled(trading_date):
    """
    已借券賣出
    """
    dsitem = 'short_selled'
    logger = common.get_logger('finance')
    datestr = trading_date.replace('-', '')

    # 快取處理
    if has_cache(dsitem, datestr):
        logger.info('載入 %s 的已借券賣出', datestr)
        ds = load_cache(dsitem, datestr)
    else:
        logger.info('沒有 %s 的已借券賣出', datestr)
        session = common.get_session(False)
        url = 'http://www.twse.com.tw/exchangeReport/TWT93U?response=json&date=%s' % datestr
        resp = session.get(url)
        ds = resp.json()
        status = ds['stat']
        # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗
        # 成功: OK
        # 失敗: 查詢日期大於可查詢最大日期,請重新查詢!
        #      很抱歉,目前線上人數過多,請您稍候再試
        if status != 'OK':
            logger.error('無法取得 %s 的已借券賣出資料, 原因: %s', datestr, status)
            return
        if len(ds['data']) == 0:
            logger.error('尚未生成 %s 的已借券賣出資料, 可能尚未結算或非交易日', datestr)
            return
        logger.info('儲存 %s 的已借券賣出', datestr)
        save_cache(dsitem, datestr, ds)

    db_conn = db.get_connection()
    sql = '''
        UPDATE `short_sell` SET `security_name`=?, `selled`=?
        WHERE `trading_date`=? AND `security_id`=?
    '''
    for detail in ds['data']:
        security_id = detail[0]
        security_name = detail[1].strip()
        balance = int(detail[12].replace(',', '')) // 1000
        if security_id != '':
            db_conn.execute(
                sql, (security_name, balance, trading_date, security_id))
            logger.debug('[%s %s] 已借券賣出餘額: %s', security_id, security_name,
                         balance)
    db_conn.commit()
    db_conn.close()
Exemple #9
0
def sync_block_trading(trading_date):
    """
    鉅額交易
    """
    dsitem = 'block_trading'
    logger = common.get_logger('finance')
    datestr = trading_date.replace('-', '')

    # 快取處理
    if has_cache(dsitem, datestr):
        logger.info('載入 %s 的鉅額交易', datestr)
        ds = load_cache(dsitem, datestr)
    else:
        logger.info('沒有 %s 的鉅額交易', datestr)
        session = common.get_session(False)
        url = 'http://www.twse.com.tw/block/BFIAUU?response=json&date=%s&selectType=S' % datestr
        resp = session.get(url)
        ds = resp.json()
        status = ds['stat']
        # 注意! 即使發生問題, HTTP 回應碼也是 200, 必須依 JSON 分辨成功或失敗
        # 成功: OK
        # 失敗: 查詢日期大於可查詢最大日期,請重新查詢!
        #      很抱歉,目前線上人數過多,請您稍候再試
        if status != 'OK':
            logger.error('無法取得 %s 的鉅額交易資料, 原因: %s', datestr, status)
            return
        if len(ds['data']) == 0:
            logger.error('沒有 %s 的鉅額交易資料, 可能尚未結算或是非交易日', datestr)
            return
        logger.info('儲存 %s 的鉅額交易', datestr)
        save_cache(dsitem, datestr, ds)

    db_conn = db.get_connection()
    sql = '''
        INSERT INTO `block` (
            trading_date, security_id, security_name,
            tick_rank, tick_type,
            close, volume, total
        ) VALUES (?,?,?,?,?,?,?,?)
    '''
    tick_rank = {}
    for trade in ds['data']:
        if trade[0] == '總計':
            break
        security_id = trade[0]
        security_name = trade[1]
        tick_type = trade[2]
        close = float(trade[3].replace(',', ''))
        volume = int(trade[4].replace(',', ''))
        total = int(trade[5].replace(',', ''))
        if security_id not in tick_rank:
            tick_rank[security_id] = 1
        else:
            tick_rank[security_id] += 1
        db_conn.execute(
            sql, (trading_date, security_id, security_name,
                  tick_rank[security_id], tick_type, close, volume, total))
        logger.debug('[%s %s] #%d %s 成交價: %s 股數: %s 金額: %s' %
                     (security_id, security_name, tick_rank[security_id],
                      tick_type, close, volume, total))
    db_conn.commit()
    db_conn.close()
Exemple #10
0
def sync_etf_net(trading_date):
    """
    https://mis.twse.com.tw/stock/data/all_etf.txt
    {
      "a1": [
        {
          "msgArray": [
            {
              "a": "", 代碼
              "b": "", 名稱
              "c": "", 發行量
              "d": "", 與前日發行量變化
              "e": "", 成交價
              "f": "", 淨值
              "g": "", 折溢價率
              "h": "", 前日淨值
              "i": "", 日期
              "j": "", 時間
              "k": "", ETF 類型 (1~4)
            },
            ...
          ]
          "refURL": "https://www.kgifund.com.tw/ETF/RWD/Introduction.aspx",
          "userDelay": "15000",
          "rtMessage": "OK",
          "rtCode": "0000"
        },
        ...
        {} <-- 最後有一組空的
      ]
    }
    """
    dsitem = 'etf_net'
    logger = common.get_logger('finance')
    datestr = trading_date.replace('-', '')

    # 快取處理
    if has_cache(dsitem, datestr):
        logger.info('載入 %s 的 ETF 溢價率快取', datestr)
        ds = load_cache(dsitem, datestr)
    else:
        logger.info('沒有 %s 的 ETF 溢價率快取', datestr)
        session = common.get_session(False)
        resp = session.get('https://mis.twse.com.tw/stock/data/all_etf.txt')
        ds = resp.json()
        dsdate = ds['a1'][1]['msgArray'][0]['i']
        if datestr == dsdate:
            logger.info('儲存 %s 的 ETF 溢價率快取', datestr)
            save_cache(dsitem, datestr, ds)
        else:
            logger.info('無法取得 %s 的 ETF 溢價率資料', datestr)
            return

    # 來源資料轉換 key/value 形式
    etf_dict = {}
    for fund in ds['a1']:
        if 'msgArray' in fund:
            for etf in fund['msgArray']:
                etf_dict[etf['a']] = etf

    # 依證券代碼順序處理
    db_conn = db.get_connection()
    sql = '''
        INSERT INTO `etf_offset` (
            trading_date, security_id, security_name,
            close, net, offset
        ) VALUES (?,?,?,?,?,?)
    '''
    for k in sorted(etf_dict.keys()):
        etf = etf_dict[k]
        db_conn.execute(
            sql,
            (trading_date, etf['a'], etf['b'], etf['e'], etf['f'], etf['g']))
        logger.debug('%s, %s, %s, %s%%', etf['a'], etf['b'], etf['f'],
                     etf['g'])
    db_conn.commit()
    db_conn.close()