def parseWsjHistoryHtml(body, stockId): result = [] doc = pq(body) historyTable = doc("table[class=cr_dataTable]").eq(1) #print(historyTable) tr = historyTable.filter("table tbody tr") lentr = historyTable.filter("table tbody tr").length #print(tr) for index in range(0, lentr, 1): if tr.eq(index).children("td").length == 6: #print("##########index=" + str(index)) strDate = tr.eq(index).children("td").eq( 0).text() #date. date format "Jul 23, 2020" strOpen = tr.eq(index).children("td").eq(1).text().replace( ",", "") #open strHigh = tr.eq(index).children("td").eq(2).text().replace( ",", "") #high strLow = tr.eq(index).children("td").eq(3).text().replace(",", "") #low strClose = tr.eq(index).children("td").eq(4).text().replace( ",", "") #close strVolume = tr.eq(index).children("td").eq(5).text().replace( ",", "") #volume strDate1 = parse(strDate).strftime('%Y-%m-%d') data = stockDataUtil.filterHistoryData(stockId, strDate1, strOpen, strHigh, strLow, strClose, strVolume) if data != {}: result.append(data) return result
def parseGoogleHistoryHtml(body, stockId): doc = pq(body) historyTable = doc("table.gf-table.historical_price") tr = historyTable.filter("table tr") lentr = historyTable.filter("table tr").length #print(tr) result = [] for index in range(1, lentr, 1): if tr.eq(index).children("td").length == 6: #print("##########index=" + str(index)) strDate = tr.eq(index).children("td").eq(0).text() #date strOpen = tr.eq(index).children("td").eq(1).text().replace( ",", "") #open strHigh = tr.eq(index).children("td").eq(2).text().replace( ",", "") #high strLow = tr.eq(index).children("td").eq(3).text().replace(",", "") #low strClose = tr.eq(index).children("td").eq(4).text().replace( ",", "") #close strVolume = tr.eq(index).children("td").eq(5).text().replace( ",", "") #volume strDate1 = parse(strDate).strftime("%Y-%m-%d") data = stockDataUtil.filterHistoryData(stockId, strDate1, strOpen, strHigh, strLow, strClose, strVolume) if data != {}: result.append(data) #print(data) return result
def getHistorical_twse_old(stockId, marketType, startDate, endDate): twseHistoryUrl = "http://www.twse.com.tw/ch/trading/exchange/STOCK_DAY/STOCK_DAYMAIN.php" print(twseHistoryUrl) startDate_year = parse(startDate).strftime("%Y") startDate_month = parse(startDate).strftime("%m") startDate_day = parse(startDate).strftime("%-1d") startDateStr = startDate_month + '+' + startDate_day + '+' + startDate_year endDate_year = parse(endDate).strftime("%Y") endDate_month = parse(endDate).strftime("%m") endDate_day = parse(endDate).strftime("%-1d") endDateStr = endDate_month + '+' + endDate_day + '+' + endDate_year #print(startDateStr + "to" + endDateStr) result = [] for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)): for qryMonth in list( range(int(startDate_month), int(endDate_month) + 1, 1)): #print(str(qryYear) + "-" + str(qryMonth)) payload = { "download": "", "query_year": str(qryYear), "query_month": str(qryMonth), "CO_ID": stockId, "query-button": "查詢", } headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36' } res = requests.post(twseHistoryUrl, data=payload, headers=headers) doc = pq(res.text) historyTable = doc("table") tr = historyTable.filter("table tbody tr") lentr = historyTable.filter("table tbody tr").length #print(res.text) for index in range(0, lentr, 1): #print("##########index=" + str(index)) strDate = tr.eq(index).children("td").eq( 0).text() #date, format 104/1/1 strOpen = tr.eq(index).children("td").eq(3).text().replace( ",", "") #open strHigh = tr.eq(index).children("td").eq(4).text().replace( ",", "") #high strLow = tr.eq(index).children("td").eq(5).text().replace( ",", "") #low strClose = tr.eq(index).children("td").eq(6).text().replace( ",", "") #close strVolume = tr.eq(index).children("td").eq(1).text().replace( ",", "") #volume strDate1 = parse(stockDataUtil.twYear2StandardYear( strDate)).strftime("%Y-%m-%d") strVolume1 = str(int(int(strVolume) / 1000)) data = stockDataUtil.filterHistoryData(stockId, strDate1, strOpen, strHigh, strLow, strClose, strVolume1) if data != {}: result.append(data) #print(data) return result
def getHistorical_twse(stockId, marketType, startDate, endDate): from fake_useragent import UserAgent ua = UserAgent() startDate_year = parse(startDate).strftime("%Y") startDate_month = parse(startDate).strftime("%m") startDate_day = parse(startDate).strftime("%d") startDateStr = startDate_year + startDate_month + startDate_day endDate_year = parse(endDate).strftime("%Y") endDate_month = parse(endDate).strftime("%m") endDate_day = parse(endDate).strftime("%d") endDateStr = endDate_year + endDate_month + endDate_day #print(startDateStr + "to" + endDateStr) result = [] timestamp1 = '{:.0f}'.format(datetime.datetime.now().timestamp() * 1000) for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)): for qryMonth in list( range(int(startDate_month), int(endDate_month) + 1, 1)): date1 = str(qryYear) + '{:0>2}'.format(str(qryMonth)) + "01" twseHistoryUrl = ( "http://www.twse.com.tw/exchangeReport/STOCK_DAY?response=csv" + "&date=" + date1 + "&stockNo=" + stockId) print(twseHistoryUrl) user_agent = ua.random headers = {'user-agent': user_agent} #headers={'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'} res = requests.post(twseHistoryUrl, headers=headers) time.sleep(5) # sleep=3 will fail, sleep=5 OK decoded_content = res.content.decode('Big5') #print(decoded_content.splitlines()) ary = decoded_content.splitlines() #print(ary) #del ary[0:2] ary = ary[2:-5] cr = csv.reader(ary) ary1 = list(cr) #print(ary1) for row in ary1: if len(row) == 10: strDate = row[0] #date. date format 109/07/21 strOpen = row[3].replace(",", "") #open strHigh = row[4].replace(",", "") #high strLow = row[5].replace(",", "") #Low strClose = row[6].replace(",", "") #Close strVolume = row[1].replace(",", "") #volume if stockDataUtil.is_date(strDate) == True: strDate1 = stockDataUtil.twYear2StandardYear( strDate) # date format change to 19-07-21 strVolume1 = str(int(float(strVolume))) data = stockDataUtil.filterHistoryData( stockId, strDate1, strOpen, strHigh, strLow, strClose, strVolume1) if data != {}: result.append(data) #print(data) return result
def getHistorical_sina(stockId, marketType, startDate, endDate): from fake_useragent import UserAgent ua = UserAgent() sinaHistoryUrl = "http://stock.finance.sina.com.cn/hkstock/history/" + stockId + ".html" print(sinaHistoryUrl) startDate_year = parse(startDate).strftime("%Y") startDate_month = parse(startDate).strftime("%m") startDate_day = parse(startDate).strftime("%-1d") startDateStr = startDate_month + '+' + startDate_day + '+' + startDate_year endDate_year = parse(endDate).strftime("%Y") endDate_month = parse(endDate).strftime("%m") endDate_day = parse(endDate).strftime("%-1d") endDateStr = endDate_month + '+' + endDate_day + '+' + endDate_year #print(startDateStr + " to " + endDateStr) result = [] for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)): for qrySeason in list( range(1, int((int(startDate_month) - 1) / 3) + 2, 1)): #print(str(qryYear) + "-" + str(qrySeason)) payload = {"year": str(qryYear), "season": str(qrySeason)} #print(payload) user_agent = ua.random headers = {'user-agent': user_agent} #headers={'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'} res = requests.post(sinaHistoryUrl, data=payload, headers=headers) doc = pq(res.text) time.sleep(1) historyTable = doc("table") tr = historyTable.filter("table tbody tr") lentr = historyTable.filter("table tbody tr").length #print(tr) for index in range(1, lentr, 1): #index=0為欄位名稱 #print("##########index=" + str(index)) strDate = tr.eq(index).children("td").eq( 0).text() #date, format 104/1/1 strOpen = tr.eq(index).children("td").eq(6).text().replace( ",", "") #open strHigh = tr.eq(index).children("td").eq(7).text().replace( ",", "") #high strLow = tr.eq(index).children("td").eq(8).text().replace( ",", "") #low strClose = tr.eq(index).children("td").eq(1).text().replace( ",", "") #close strVolume = tr.eq(index).children("td").eq(4).text().replace( ",", "") #volume strDate1 = parse(strDate).strftime("%Y-%m-%d") strVolume1 = str(int(float(strVolume))) data = stockDataUtil.filterHistoryData(stockId, strDate1, strOpen, strHigh, strLow, strClose, strVolume1) if data != {}: result.append(data) #print(data) return result
def getHistorical_tpex(stockId, marketType, startDate, endDate): from fake_useragent import UserAgent ua = UserAgent() startDate_year = parse(startDate).strftime("%Y") startDate_month = parse(startDate).strftime("%m") startDate_day = parse(startDate).strftime("%d") startDateStr = startDate_month + '+' + startDate_day + '+' + startDate_year endDate_year = parse(endDate).strftime("%Y") endDate_month = parse(endDate).strftime("%m") endDate_day = parse(endDate).strftime("%d") endDateStr = endDate_month + '+' + endDate_day + '+' + endDate_year #print(startDateStr + "to" + endDateStr) result = [] for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)): for qryMonth in list( range(int(startDate_month), int(endDate_month) + 1, 1)): #print(str(qryYear) + "-" + str(qryMonth)) tpexHistoryUrl="http://www.tpex.org.tw/web/stock/aftertrading/daily_trading_info/st43_download.php?l=zh-tw&d=" + \ str(qryYear-1911) + "/" + str(qryMonth) + "&stkno=" + stockId + "&s=0,asc,0" user_agent = ua.random headers = {'user-agent': user_agent} #headers={'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'} print(tpexHistoryUrl) res = requests.get(tpexHistoryUrl, headers=headers) time.sleep(5) decoded_content = res.content.decode('Big5') #print(decoded_content.splitlines()) ary = decoded_content.splitlines() del ary[0:5] ary.pop() #print(ary) cr = csv.reader(ary) ary1 = list(cr) #print(ary1) for row in ary1: if len(row) == 9: strDate = row[0].replace("*", "") #date strOpen = row[3].replace(",", "") #open strHigh = row[4].replace(",", "") #high strLow = row[5].replace(",", "") #Low strClose = row[6].replace(",", "") #Close strVolume = row[1].replace(",", "") #volume if stockDataUtil.is_date(strDate) == True: strDate1 = parse( stockDataUtil.twYear2StandardYear( strDate)).strftime("%Y-%m-%d") strVolume1 = str(int(float(strVolume))) data = stockDataUtil.filterHistoryData( stockId, strDate1, strOpen, strHigh, strLow, strClose, strVolume1) if data != {}: result.append(data) #print(data) return result