Ejemplo n.º 1
0
def get(url):
    global proxy

    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36'
    }
    retry = 0
    while retry < 10:
        if proxy is None:
            proxy = getProxy()
        try:
            res = requests.get(url,
                               headers=headers,
                               proxies={'https': f'https://{proxy}'},
                               timeout=5)
            if res.status_code != 200:
                proxy = None
                retry += 1
                logger.error("Load page failed!")
                continue

            data = decode_content(res)
            return data
        except:
            proxy = None
            retry += 1
            logger.error("Load page failed!")
    return None
Ejemplo n.º 2
0
    def init_table_daily(self):
        try:
            #日期 開盤價 最高價 最低價 收盤價 成交股數
            sql = '''CREATE TABLE IF NOT EXISTS daily (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     date TEXT NOT NULL,
                     code TEXT NOT NULL,
                     name TEXT NOT NULL,
                     open REAL,
                     high REAL,
                     low REAL,
                     close REAL,
                     volume REAL NOT NULL,
                     stock_id INTEGER,
                     UNIQUE(date, code),
                     FOREIGN KEY(stock_id) REFERENCES stock(id) ON DELETE CASCADE
                     );'''
            self.cursor.execute(sql)
            self.commit()
        except:
            logger.error(traceback.format_exc())


#cursor.execute("SELECT * FROM {} WHERE code=?".format(stock), ('2330',))
#cursor.execute("INSERT INTO {} VALUES(?, ?)".format(group), ('台積電', '2330'))
#cursor.execute("UPDATE {} SET alive=? WHERE code=?".format(group), (False, '2330'))
#cursor.execute("SELECT * FROM stock INNER JOIN daily ON stock.id = daily.stock_id WHERE stock.code = '2330';")
#cursor.execute("DELETE from stock where id=1")
Ejemplo n.º 3
0
def update_stocks_raw_data_from_goodinfo():
    today = datetime.date.today()
    today = today.strftime('%m/%d')
    logger.info('today : %s' % today)

    try:
        if not os.path.exists(CONST.STOCK_LIST):
            get_stock_list_from_db()
        fs = open(CONST.STOCK_LIST)
        line = fs.readline()
        while line:
            line = line.strip('\n')
            target = os.path.join(RAWDIR, str(line))

            to_be_update = False
            if os.path.exists(target):
                with open(target, 'r') as f:
                    res = f.read()

                if res is not None:
                    soup = BeautifulSoup(res, "lxml")
                    date = soup.find('nobr',
                                     {'style': 'font-size:9pt;color:gray;'})
                    if not date:
                        to_be_update = True
                    else:
                        date = date.get_text().split()[1]
                        if date != today:
                            to_be_update = True
                else:
                    to_be_update = True
            else:
                to_be_update = True

            if to_be_update:
                logger.info('%s updating...' % str(line))
                target_url = GOODINFO_URL + str(line)

                while True:
                    #time.sleep(5)
                    try:
                        res = get(target_url)
                        if res is not None and '瀏覽量異常' not in res:
                            path = os.path.join(RAWDIR, str(line))
                            with open(path, "w") as f:
                                f.write(res)
                            break
                    except:
                        logger.error(traceback.format_exc())
            else:
                logger.info('%s is ready' % str(line))
            line = fs.readline()
    except:
        logger.error(traceback.format_exc())
Ejemplo n.º 4
0
def get_max_min_dy(stock_id, online=False):
    try:
        res = None
        if online:
            target_url = GOODINFO_URL + str(stock_id)
            res = get(target_url)
        else:
            target = os.path.join(RAWDIR, str(stock_id))
            if os.path.exists(target):
                with open(target, 'r') as f:
                    res = f.read()

        if res is not None:
            if online:
                path = os.path.join(RAWDIR, str(stock_id))
                with open(path, "w") as f:
                    f.write(res)
            soup = BeautifulSoup(res, "lxml")
            #logger.info(soup.prettify())
            tbls = soup.find_all(
                'table', {
                    'class': 'solid_1_padding_4_0_tbl',
                    'width': '100%',
                    'bgcolor': '#d2d2d2'
                })
            for tbl in tbls:
                trs = tbl.find_all('tr')
                logger.info('%4s  %6s  %6s  %6s  %6s' %
                            ("年度", "最高", "最低", "平均", "殖利率"))
                valid = CONST.CURRENT_YEAR
                for i in range(4, len(trs)):
                    #logger.info(trs[i])
                    tds = trs[i].find_all('td')
                    if len(tds) >= 19:
                        year = tds[12].get_text()
                        max = tds[13].get_text()
                        min = tds[14].get_text()
                        average = tds[15].get_text()
                        dy = tds[18].get_text()
                        if year.isdigit() and int(year) == valid:
                            logger.info('%6s  %8s  %8s  %8s  %8s' %
                                        (year, max, min, average, dy))
                            valid -= 1
    except:
        logger.error(traceback.format_exc())
Ejemplo n.º 5
0
def decode_content(data):
    txt = None
    # 對 HTTP / HTTPS 回應的二進位原始內容,使用chardet進行編碼判斷
    det = chardet.detect(data.content)
    try:
        # 若判斷結果信心度超過 0.5
        if det['confidence'] > 0.5:
            #logger.info("encoding: %s" % det['encoding'])
            if det['encoding'] == 'big-5' or det['encoding'] == 'Big5':
                # cp950包含big5
                txt = data.content.decode('cp950')
            else:
                txt = data.content.decode(det['encoding'])
        else:
            # 若判斷信心度不足,則嘗試使用 UTF-8 解碼
            txt = data.content.decode('utf-8')
    except:
        logger.error(traceback.format_exc())
    return txt
Ejemplo n.º 6
0
def add_daily(date, info):
    db.cursor.execute(
        f"SELECT * FROM daily WHERE date='{date}' and code='{info.code}'")
    result = db.cursor.fetchone()
    if result != None:
        logger.error(f"The data is duplicated: {info}")
        return

    db.cursor.execute(f"SELECT id FROM stock WHERE code='{info.code}'")
    result = db.cursor.fetchone()
    if result != None:
        stock_id = result[0]
        if info.volume != 0:
            db.cursor.execute("INSERT INTO daily(date, code, name, open, high, low, close, volume, stock_id) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", \
                                                (date, info.code, info.name, info.open, info.high, info.low, info.close, info.volume, result[0]))
        else:
            db.cursor.execute("INSERT INTO daily(date, code, name, volume, stock_id) VALUES(?, ?, ?, ?, ?)", \
                                                (date, info.code, info.name, info.volume, stock_id))
        db.commit()
    else:
        logger.error(f"Cannot find stock for {info}")
Ejemplo n.º 7
0
def update_listed_company(url=None, static_data=None):
    if url is not None:
        data = get(url)
    else:
        data = static_data
    if data is not None:
        #get the first table
        df = pd.read_html(data)[0]
        #                       0                    1           2    3     4        5    6
        #0          有價證券代號及名稱  國際證券辨識號碼(ISIN Code)         上市日  市場別   產業別  CFICode   備註
        #1                 股票                   股票          股票   股票    股票       股票   股票
        #2            1101 台泥         TW0001101004  1962/02/09   上市  水泥工業   ESVUFR  NaN
        #3            1102 亞泥         TW0001102002  1962/06/08   上市  水泥工業   ESVUFR  NaN
        #df.columns: Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')

        #with open('test', 'a', encoding='UTF-8') as f:
        #    f.write(df.to_string(header = False, index = False))
        for index, row in df.iterrows():
            if row[3] in CONST.VALID_LISTED_TYPE and row[
                    5] in CONST.VALID_CFICode:
                add_stock(row)
    else:
        logger.error(f"Cannot get data from {url}")
Ejemplo n.º 8
0
def update_data_by_day():
    date = '20200514'

    url = 'https://www.twse.com.tw/exchangeReport/MI_INDEX?' + \
          'response=json&' + \
          'type=ALLBUT0999&' + \
          'date=' + str(date)
    data = get(url)
    if data is not None:
        data = json.loads(data)
        stat = data['stat']
        # 如果 stat 不是 OK,代表查詢日期尚無資料
        if stat != 'OK':
            logger.warning(f'There is no data at {date}')
            return

        #"fields9": ["證券代號", "證券名稱", "成交股數", "成交筆數", "成交金額", "開盤價", "最高價", "最低價", "收盤價", "漲跌(+/-)", "漲跌價差", "最後揭示買價", "最後揭示買量", "最後揭示賣價", "最後揭示賣量", "本益比"]
        #"data9": [["0050", "元大台灣50", "10,748,771", "4,277", "881,536,222", "81.45", "82.20", "81.40", "82.15", "+", "0.95", "82.15", "359", "82.20", "691", "0.00"],...
        # 所需資料表格位於第 9 表格
        records = data['data9']

        for record in records:
            try:
                code = record[0].strip()
                name = record[1].strip()
                volume = record[2].replace(',', '').strip()
                open = record[5].replace(',', '').strip()
                high = record[6].replace(',', '').strip()
                low = record[7].replace(',', '').strip()
                close = record[8].replace(',', '').strip()
                info = AfterHoursInfo(code, name, volume, open, high, low,
                                      close)
                add_daily(date, info)
            except:
                logger.error(traceback.format_exc())
    else:
        logger.error(f"Cannot get data from {url}")
Ejemplo n.º 9
0
    def init_table_stock(self):
        try:
            sql = '''CREATE TABLE IF NOT EXISTS stock (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     code TEXT NOT NULL UNIQUE,
                     name TEXT NOT NULL UNIQUE,
                     listed_day TEXT NOT NULL,
                     listed_type INTEGER NOT NULL,
                     CFICode TEXT NOT NULL,
                     listed BOOLEAN DEFAULT True
                     );'''
            self.cursor.execute(sql)

            sql = (
                "CREATE INDEX IF NOT EXISTS index_stock_code ON stock (code);")
            self.cursor.execute(sql)

            sql = (
                "CREATE INDEX IF NOT EXISTS index_stock_name ON stock (name);")
            self.cursor.execute(sql)

            self.commit()
        except:
            logger.error(traceback.format_exc())
Ejemplo n.º 10
0
Archivo: main.py Proyecto: qcycp/stocks
    while True:
        try:
            print("1) 查詢歷年股價及殖利率")
            print("2) 更新股票清單 from TWSE")
            print("3) 更新歷年股價資料 from Goodinfo")
            print("4) 篩選波動穩動的最低股價清單")
            print("5) 更新當日股價")
            print("q) 離開")
            op = input('請選取操作: ')
            if op == '1':
                while True:
                    sid = input('請輸入股票ID (q回主選單): ')
                    if sid == 'q':
                        break
                    get_max_min_dy(sid)
            elif op == '2':
                update_stocks()
            elif op == '3':
                update_stocks_raw_data_from_goodinfo()
            elif op == '4':
                get_effective_tracking_list()
            elif op == '5':
                update_data_by_day()
            elif op == 'Q' or op == 'q':
                break
            else:
                print("輸入錯誤")
        except:
            logger.error(traceback.format_exc())
    db.close()
Ejemplo n.º 11
0
def get_effective_tracking_list(now=True):
    TRACKING_YEARS = 5
    VOLUME_THRESHOLD = 250
    STD_THRESHOLD = 2
    PROFIT_THRESHOLD = 0.3
    READY_TO_BUY_THRESHOLD = 0.1
    VOLATILITY_THRESHOLD = 0.25
    try:
        if not os.path.exists(CONST.STOCK_LIST):
            get_stock_list_from_db()
        total = len(open(CONST.STOCK_LIST, 'rU').readlines())

        fs = open(CONST.STOCK_LIST)
        count = 1
        line = fs.readline()
        while line:
            if count == total:
                percent = 100.0
                print('Searching... %s [%d/%d]' %
                      (str(percent) + '%', count, total),
                      end='\n')
            else:
                percent = round(1.0 * count / total * 100, 2)
                print('Searching... %.02f%% [%d/%d]' % (percent, count, total),
                      end='\r')

            line = line.strip('\n')
            target = os.path.join(RAWDIR, str(line))

            if os.path.exists(target):
                with open(target, 'r') as f:
                    res = f.read()

            if res is not None and '瀏覽量異常' not in res:
                soup = BeautifulSoup(res, "lxml")
                volume = get_volume(soup)
                # condition 1: 成交量 > 250
                if volume > VOLUME_THRESHOLD:
                    max_list, min_list = get_max_min_list(soup)

                    # condition 2: 歷史紀錄超過5年
                    if len(max_list) >= TRACKING_YEARS:
                        max_list = max_list[:TRACKING_YEARS]
                        min_list = min_list[:TRACKING_YEARS]

                        #樣本標準差
                        #std = np.std(np.array(min_list), ddof=1)
                        # condition 3: 最低股價樣本標準差 < 2
                        #if std < STD_THRESHOLD:

                        volatility = (max(min_list) -
                                      min(min_list)) / np.mean(min_list)
                        #logger.info('%s : %s' % (line, volatility))
                        # condition 3: 最低股價的波動率小
                        if volatility < VOLATILITY_THRESHOLD:
                            avg_max = np.mean(max_list)
                            avg_min = np.mean(min_list)
                            # condition 4: 平均最高股價比平均最低股價 > 30%
                            if (avg_max -
                                    avg_min) / avg_min > PROFIT_THRESHOLD:
                                if now:
                                    close = get_close(soup)
                                    # 挑選當前股價適合進場的標的: 當前股價大於平均最低股價不超過10%
                                    if (close - avg_min
                                        ) / avg_min < READY_TO_BUY_THRESHOLD:
                                        logger.info('%s' % line)
                                else:
                                    logger.info('%s' % line)

            line = fs.readline()
            count += 1
    except:
        logger.error(traceback.format_exc())