def get(url): global proxy headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36' } retry = 0 while retry < 10: if proxy is None: proxy = getProxy() try: res = requests.get(url, headers=headers, proxies={'https': f'https://{proxy}'}, timeout=5) if res.status_code != 200: proxy = None retry += 1 logger.error("Load page failed!") continue data = decode_content(res) return data except: proxy = None retry += 1 logger.error("Load page failed!") return None
def init_table_daily(self): try: #日期 開盤價 最高價 最低價 收盤價 成交股數 sql = '''CREATE TABLE IF NOT EXISTS daily ( id INTEGER PRIMARY KEY AUTOINCREMENT, date TEXT NOT NULL, code TEXT NOT NULL, name TEXT NOT NULL, open REAL, high REAL, low REAL, close REAL, volume REAL NOT NULL, stock_id INTEGER, UNIQUE(date, code), FOREIGN KEY(stock_id) REFERENCES stock(id) ON DELETE CASCADE );''' self.cursor.execute(sql) self.commit() except: logger.error(traceback.format_exc()) #cursor.execute("SELECT * FROM {} WHERE code=?".format(stock), ('2330',)) #cursor.execute("INSERT INTO {} VALUES(?, ?)".format(group), ('台積電', '2330')) #cursor.execute("UPDATE {} SET alive=? WHERE code=?".format(group), (False, '2330')) #cursor.execute("SELECT * FROM stock INNER JOIN daily ON stock.id = daily.stock_id WHERE stock.code = '2330';") #cursor.execute("DELETE from stock where id=1")
def update_stocks_raw_data_from_goodinfo(): today = datetime.date.today() today = today.strftime('%m/%d') logger.info('today : %s' % today) try: if not os.path.exists(CONST.STOCK_LIST): get_stock_list_from_db() fs = open(CONST.STOCK_LIST) line = fs.readline() while line: line = line.strip('\n') target = os.path.join(RAWDIR, str(line)) to_be_update = False if os.path.exists(target): with open(target, 'r') as f: res = f.read() if res is not None: soup = BeautifulSoup(res, "lxml") date = soup.find('nobr', {'style': 'font-size:9pt;color:gray;'}) if not date: to_be_update = True else: date = date.get_text().split()[1] if date != today: to_be_update = True else: to_be_update = True else: to_be_update = True if to_be_update: logger.info('%s updating...' % str(line)) target_url = GOODINFO_URL + str(line) while True: #time.sleep(5) try: res = get(target_url) if res is not None and '瀏覽量異常' not in res: path = os.path.join(RAWDIR, str(line)) with open(path, "w") as f: f.write(res) break except: logger.error(traceback.format_exc()) else: logger.info('%s is ready' % str(line)) line = fs.readline() except: logger.error(traceback.format_exc())
def get_max_min_dy(stock_id, online=False): try: res = None if online: target_url = GOODINFO_URL + str(stock_id) res = get(target_url) else: target = os.path.join(RAWDIR, str(stock_id)) if os.path.exists(target): with open(target, 'r') as f: res = f.read() if res is not None: if online: path = os.path.join(RAWDIR, str(stock_id)) with open(path, "w") as f: f.write(res) soup = BeautifulSoup(res, "lxml") #logger.info(soup.prettify()) tbls = soup.find_all( 'table', { 'class': 'solid_1_padding_4_0_tbl', 'width': '100%', 'bgcolor': '#d2d2d2' }) for tbl in tbls: trs = tbl.find_all('tr') logger.info('%4s %6s %6s %6s %6s' % ("年度", "最高", "最低", "平均", "殖利率")) valid = CONST.CURRENT_YEAR for i in range(4, len(trs)): #logger.info(trs[i]) tds = trs[i].find_all('td') if len(tds) >= 19: year = tds[12].get_text() max = tds[13].get_text() min = tds[14].get_text() average = tds[15].get_text() dy = tds[18].get_text() if year.isdigit() and int(year) == valid: logger.info('%6s %8s %8s %8s %8s' % (year, max, min, average, dy)) valid -= 1 except: logger.error(traceback.format_exc())
def decode_content(data): txt = None # 對 HTTP / HTTPS 回應的二進位原始內容,使用chardet進行編碼判斷 det = chardet.detect(data.content) try: # 若判斷結果信心度超過 0.5 if det['confidence'] > 0.5: #logger.info("encoding: %s" % det['encoding']) if det['encoding'] == 'big-5' or det['encoding'] == 'Big5': # cp950包含big5 txt = data.content.decode('cp950') else: txt = data.content.decode(det['encoding']) else: # 若判斷信心度不足,則嘗試使用 UTF-8 解碼 txt = data.content.decode('utf-8') except: logger.error(traceback.format_exc()) return txt
def add_daily(date, info): db.cursor.execute( f"SELECT * FROM daily WHERE date='{date}' and code='{info.code}'") result = db.cursor.fetchone() if result != None: logger.error(f"The data is duplicated: {info}") return db.cursor.execute(f"SELECT id FROM stock WHERE code='{info.code}'") result = db.cursor.fetchone() if result != None: stock_id = result[0] if info.volume != 0: db.cursor.execute("INSERT INTO daily(date, code, name, open, high, low, close, volume, stock_id) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)", \ (date, info.code, info.name, info.open, info.high, info.low, info.close, info.volume, result[0])) else: db.cursor.execute("INSERT INTO daily(date, code, name, volume, stock_id) VALUES(?, ?, ?, ?, ?)", \ (date, info.code, info.name, info.volume, stock_id)) db.commit() else: logger.error(f"Cannot find stock for {info}")
def update_listed_company(url=None, static_data=None): if url is not None: data = get(url) else: data = static_data if data is not None: #get the first table df = pd.read_html(data)[0] # 0 1 2 3 4 5 6 #0 有價證券代號及名稱 國際證券辨識號碼(ISIN Code) 上市日 市場別 產業別 CFICode 備註 #1 股票 股票 股票 股票 股票 股票 股票 #2 1101 台泥 TW0001101004 1962/02/09 上市 水泥工業 ESVUFR NaN #3 1102 亞泥 TW0001102002 1962/06/08 上市 水泥工業 ESVUFR NaN #df.columns: Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64') #with open('test', 'a', encoding='UTF-8') as f: # f.write(df.to_string(header = False, index = False)) for index, row in df.iterrows(): if row[3] in CONST.VALID_LISTED_TYPE and row[ 5] in CONST.VALID_CFICode: add_stock(row) else: logger.error(f"Cannot get data from {url}")
def update_data_by_day(): date = '20200514' url = 'https://www.twse.com.tw/exchangeReport/MI_INDEX?' + \ 'response=json&' + \ 'type=ALLBUT0999&' + \ 'date=' + str(date) data = get(url) if data is not None: data = json.loads(data) stat = data['stat'] # 如果 stat 不是 OK,代表查詢日期尚無資料 if stat != 'OK': logger.warning(f'There is no data at {date}') return #"fields9": ["證券代號", "證券名稱", "成交股數", "成交筆數", "成交金額", "開盤價", "最高價", "最低價", "收盤價", "漲跌(+/-)", "漲跌價差", "最後揭示買價", "最後揭示買量", "最後揭示賣價", "最後揭示賣量", "本益比"] #"data9": [["0050", "元大台灣50", "10,748,771", "4,277", "881,536,222", "81.45", "82.20", "81.40", "82.15", "+", "0.95", "82.15", "359", "82.20", "691", "0.00"],... # 所需資料表格位於第 9 表格 records = data['data9'] for record in records: try: code = record[0].strip() name = record[1].strip() volume = record[2].replace(',', '').strip() open = record[5].replace(',', '').strip() high = record[6].replace(',', '').strip() low = record[7].replace(',', '').strip() close = record[8].replace(',', '').strip() info = AfterHoursInfo(code, name, volume, open, high, low, close) add_daily(date, info) except: logger.error(traceback.format_exc()) else: logger.error(f"Cannot get data from {url}")
def init_table_stock(self): try: sql = '''CREATE TABLE IF NOT EXISTS stock ( id INTEGER PRIMARY KEY AUTOINCREMENT, code TEXT NOT NULL UNIQUE, name TEXT NOT NULL UNIQUE, listed_day TEXT NOT NULL, listed_type INTEGER NOT NULL, CFICode TEXT NOT NULL, listed BOOLEAN DEFAULT True );''' self.cursor.execute(sql) sql = ( "CREATE INDEX IF NOT EXISTS index_stock_code ON stock (code);") self.cursor.execute(sql) sql = ( "CREATE INDEX IF NOT EXISTS index_stock_name ON stock (name);") self.cursor.execute(sql) self.commit() except: logger.error(traceback.format_exc())
while True: try: print("1) 查詢歷年股價及殖利率") print("2) 更新股票清單 from TWSE") print("3) 更新歷年股價資料 from Goodinfo") print("4) 篩選波動穩動的最低股價清單") print("5) 更新當日股價") print("q) 離開") op = input('請選取操作: ') if op == '1': while True: sid = input('請輸入股票ID (q回主選單): ') if sid == 'q': break get_max_min_dy(sid) elif op == '2': update_stocks() elif op == '3': update_stocks_raw_data_from_goodinfo() elif op == '4': get_effective_tracking_list() elif op == '5': update_data_by_day() elif op == 'Q' or op == 'q': break else: print("輸入錯誤") except: logger.error(traceback.format_exc()) db.close()
def get_effective_tracking_list(now=True): TRACKING_YEARS = 5 VOLUME_THRESHOLD = 250 STD_THRESHOLD = 2 PROFIT_THRESHOLD = 0.3 READY_TO_BUY_THRESHOLD = 0.1 VOLATILITY_THRESHOLD = 0.25 try: if not os.path.exists(CONST.STOCK_LIST): get_stock_list_from_db() total = len(open(CONST.STOCK_LIST, 'rU').readlines()) fs = open(CONST.STOCK_LIST) count = 1 line = fs.readline() while line: if count == total: percent = 100.0 print('Searching... %s [%d/%d]' % (str(percent) + '%', count, total), end='\n') else: percent = round(1.0 * count / total * 100, 2) print('Searching... %.02f%% [%d/%d]' % (percent, count, total), end='\r') line = line.strip('\n') target = os.path.join(RAWDIR, str(line)) if os.path.exists(target): with open(target, 'r') as f: res = f.read() if res is not None and '瀏覽量異常' not in res: soup = BeautifulSoup(res, "lxml") volume = get_volume(soup) # condition 1: 成交量 > 250 if volume > VOLUME_THRESHOLD: max_list, min_list = get_max_min_list(soup) # condition 2: 歷史紀錄超過5年 if len(max_list) >= TRACKING_YEARS: max_list = max_list[:TRACKING_YEARS] min_list = min_list[:TRACKING_YEARS] #樣本標準差 #std = np.std(np.array(min_list), ddof=1) # condition 3: 最低股價樣本標準差 < 2 #if std < STD_THRESHOLD: volatility = (max(min_list) - min(min_list)) / np.mean(min_list) #logger.info('%s : %s' % (line, volatility)) # condition 3: 最低股價的波動率小 if volatility < VOLATILITY_THRESHOLD: avg_max = np.mean(max_list) avg_min = np.mean(min_list) # condition 4: 平均最高股價比平均最低股價 > 30% if (avg_max - avg_min) / avg_min > PROFIT_THRESHOLD: if now: close = get_close(soup) # 挑選當前股價適合進場的標的: 當前股價大於平均最低股價不超過10% if (close - avg_min ) / avg_min < READY_TO_BUY_THRESHOLD: logger.info('%s' % line) else: logger.info('%s' % line) line = fs.readline() count += 1 except: logger.error(traceback.format_exc())