Example #1
0
def parseWsjHistoryHtml(body, stockId):
    result = []
    doc = pq(body)
    historyTable = doc("table[class=cr_dataTable]").eq(1)
    #print(historyTable)
    tr = historyTable.filter("table tbody tr")
    lentr = historyTable.filter("table tbody tr").length
    #print(tr)
    for index in range(0, lentr, 1):
        if tr.eq(index).children("td").length == 6:
            #print("##########index=" + str(index))
            strDate = tr.eq(index).children("td").eq(
                0).text()  #date. date format "Jul 23, 2020"
            strOpen = tr.eq(index).children("td").eq(1).text().replace(
                ",", "")  #open
            strHigh = tr.eq(index).children("td").eq(2).text().replace(
                ",", "")  #high
            strLow = tr.eq(index).children("td").eq(3).text().replace(",",
                                                                      "")  #low
            strClose = tr.eq(index).children("td").eq(4).text().replace(
                ",", "")  #close
            strVolume = tr.eq(index).children("td").eq(5).text().replace(
                ",", "")  #volume
            strDate1 = parse(strDate).strftime('%Y-%m-%d')
            data = stockDataUtil.filterHistoryData(stockId, strDate1, strOpen,
                                                   strHigh, strLow, strClose,
                                                   strVolume)
            if data != {}: result.append(data)
    return result
Example #2
0
def parseGoogleHistoryHtml(body, stockId):
    doc = pq(body)
    historyTable = doc("table.gf-table.historical_price")
    tr = historyTable.filter("table tr")
    lentr = historyTable.filter("table tr").length
    #print(tr)
    result = []
    for index in range(1, lentr, 1):
        if tr.eq(index).children("td").length == 6:
            #print("##########index=" + str(index))
            strDate = tr.eq(index).children("td").eq(0).text()  #date
            strOpen = tr.eq(index).children("td").eq(1).text().replace(
                ",", "")  #open
            strHigh = tr.eq(index).children("td").eq(2).text().replace(
                ",", "")  #high
            strLow = tr.eq(index).children("td").eq(3).text().replace(",",
                                                                      "")  #low
            strClose = tr.eq(index).children("td").eq(4).text().replace(
                ",", "")  #close
            strVolume = tr.eq(index).children("td").eq(5).text().replace(
                ",", "")  #volume
            strDate1 = parse(strDate).strftime("%Y-%m-%d")
            data = stockDataUtil.filterHistoryData(stockId, strDate1, strOpen,
                                                   strHigh, strLow, strClose,
                                                   strVolume)
            if data != {}: result.append(data)
            #print(data)
    return result
Example #3
0
def getHistorical_twse_old(stockId, marketType, startDate, endDate):
    twseHistoryUrl = "http://www.twse.com.tw/ch/trading/exchange/STOCK_DAY/STOCK_DAYMAIN.php"
    print(twseHistoryUrl)
    startDate_year = parse(startDate).strftime("%Y")
    startDate_month = parse(startDate).strftime("%m")
    startDate_day = parse(startDate).strftime("%-1d")
    startDateStr = startDate_month + '+' + startDate_day + '+' + startDate_year
    endDate_year = parse(endDate).strftime("%Y")
    endDate_month = parse(endDate).strftime("%m")
    endDate_day = parse(endDate).strftime("%-1d")
    endDateStr = endDate_month + '+' + endDate_day + '+' + endDate_year
    #print(startDateStr + "to" + endDateStr)
    result = []
    for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)):
        for qryMonth in list(
                range(int(startDate_month),
                      int(endDate_month) + 1, 1)):
            #print(str(qryYear) + "-" + str(qryMonth))
            payload = {
                "download": "",
                "query_year": str(qryYear),
                "query_month": str(qryMonth),
                "CO_ID": stockId,
                "query-button": "查詢",
            }
            headers = {
                'User-Agent':
                'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'
            }
            res = requests.post(twseHistoryUrl, data=payload, headers=headers)
            doc = pq(res.text)
            historyTable = doc("table")
            tr = historyTable.filter("table tbody tr")
            lentr = historyTable.filter("table tbody tr").length
            #print(res.text)
            for index in range(0, lentr, 1):
                #print("##########index=" + str(index))
                strDate = tr.eq(index).children("td").eq(
                    0).text()  #date, format 104/1/1
                strOpen = tr.eq(index).children("td").eq(3).text().replace(
                    ",", "")  #open
                strHigh = tr.eq(index).children("td").eq(4).text().replace(
                    ",", "")  #high
                strLow = tr.eq(index).children("td").eq(5).text().replace(
                    ",", "")  #low
                strClose = tr.eq(index).children("td").eq(6).text().replace(
                    ",", "")  #close
                strVolume = tr.eq(index).children("td").eq(1).text().replace(
                    ",", "")  #volume
                strDate1 = parse(stockDataUtil.twYear2StandardYear(
                    strDate)).strftime("%Y-%m-%d")
                strVolume1 = str(int(int(strVolume) / 1000))
                data = stockDataUtil.filterHistoryData(stockId, strDate1,
                                                       strOpen, strHigh,
                                                       strLow, strClose,
                                                       strVolume1)
                if data != {}: result.append(data)
                #print(data)
    return result
Example #4
0
def getHistorical_twse(stockId, marketType, startDate, endDate):
    from fake_useragent import UserAgent
    ua = UserAgent()
    startDate_year = parse(startDate).strftime("%Y")
    startDate_month = parse(startDate).strftime("%m")
    startDate_day = parse(startDate).strftime("%d")
    startDateStr = startDate_year + startDate_month + startDate_day
    endDate_year = parse(endDate).strftime("%Y")
    endDate_month = parse(endDate).strftime("%m")
    endDate_day = parse(endDate).strftime("%d")
    endDateStr = endDate_year + endDate_month + endDate_day
    #print(startDateStr + "to" + endDateStr)
    result = []
    timestamp1 = '{:.0f}'.format(datetime.datetime.now().timestamp() * 1000)
    for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)):
        for qryMonth in list(
                range(int(startDate_month),
                      int(endDate_month) + 1, 1)):
            date1 = str(qryYear) + '{:0>2}'.format(str(qryMonth)) + "01"
            twseHistoryUrl = (
                "http://www.twse.com.tw/exchangeReport/STOCK_DAY?response=csv"
                + "&date=" + date1 + "&stockNo=" + stockId)
            print(twseHistoryUrl)
            user_agent = ua.random
            headers = {'user-agent': user_agent}
            #headers={'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'}
            res = requests.post(twseHistoryUrl, headers=headers)
            time.sleep(5)  # sleep=3 will fail, sleep=5 OK
            decoded_content = res.content.decode('Big5')
            #print(decoded_content.splitlines())
            ary = decoded_content.splitlines()
            #print(ary)
            #del ary[0:2]
            ary = ary[2:-5]
            cr = csv.reader(ary)
            ary1 = list(cr)
            #print(ary1)
            for row in ary1:
                if len(row) == 10:
                    strDate = row[0]  #date. date format 109/07/21
                    strOpen = row[3].replace(",", "")  #open
                    strHigh = row[4].replace(",", "")  #high
                    strLow = row[5].replace(",", "")  #Low
                    strClose = row[6].replace(",", "")  #Close
                    strVolume = row[1].replace(",", "")  #volume
                    if stockDataUtil.is_date(strDate) == True:
                        strDate1 = stockDataUtil.twYear2StandardYear(
                            strDate)  # date format change to 19-07-21
                        strVolume1 = str(int(float(strVolume)))
                        data = stockDataUtil.filterHistoryData(
                            stockId, strDate1, strOpen, strHigh, strLow,
                            strClose, strVolume1)
                        if data != {}: result.append(data)
                        #print(data)
    return result
Example #5
0
def getHistorical_sina(stockId, marketType, startDate, endDate):
    from fake_useragent import UserAgent
    ua = UserAgent()
    sinaHistoryUrl = "http://stock.finance.sina.com.cn/hkstock/history/" + stockId + ".html"
    print(sinaHistoryUrl)
    startDate_year = parse(startDate).strftime("%Y")
    startDate_month = parse(startDate).strftime("%m")
    startDate_day = parse(startDate).strftime("%-1d")
    startDateStr = startDate_month + '+' + startDate_day + '+' + startDate_year
    endDate_year = parse(endDate).strftime("%Y")
    endDate_month = parse(endDate).strftime("%m")
    endDate_day = parse(endDate).strftime("%-1d")
    endDateStr = endDate_month + '+' + endDate_day + '+' + endDate_year
    #print(startDateStr + " to " + endDateStr)
    result = []
    for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)):
        for qrySeason in list(
                range(1,
                      int((int(startDate_month) - 1) / 3) + 2, 1)):
            #print(str(qryYear) + "-" + str(qrySeason))
            payload = {"year": str(qryYear), "season": str(qrySeason)}
            #print(payload)
            user_agent = ua.random
            headers = {'user-agent': user_agent}
            #headers={'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'}
            res = requests.post(sinaHistoryUrl, data=payload, headers=headers)
            doc = pq(res.text)
            time.sleep(1)
            historyTable = doc("table")
            tr = historyTable.filter("table tbody tr")
            lentr = historyTable.filter("table tbody tr").length
            #print(tr)
            for index in range(1, lentr, 1):  #index=0為欄位名稱
                #print("##########index=" + str(index))
                strDate = tr.eq(index).children("td").eq(
                    0).text()  #date, format 104/1/1
                strOpen = tr.eq(index).children("td").eq(6).text().replace(
                    ",", "")  #open
                strHigh = tr.eq(index).children("td").eq(7).text().replace(
                    ",", "")  #high
                strLow = tr.eq(index).children("td").eq(8).text().replace(
                    ",", "")  #low
                strClose = tr.eq(index).children("td").eq(1).text().replace(
                    ",", "")  #close
                strVolume = tr.eq(index).children("td").eq(4).text().replace(
                    ",", "")  #volume
                strDate1 = parse(strDate).strftime("%Y-%m-%d")
                strVolume1 = str(int(float(strVolume)))
                data = stockDataUtil.filterHistoryData(stockId, strDate1,
                                                       strOpen, strHigh,
                                                       strLow, strClose,
                                                       strVolume1)
                if data != {}: result.append(data)
                #print(data)
    return result
Example #6
0
def getHistorical_tpex(stockId, marketType, startDate, endDate):
    from fake_useragent import UserAgent
    ua = UserAgent()
    startDate_year = parse(startDate).strftime("%Y")
    startDate_month = parse(startDate).strftime("%m")
    startDate_day = parse(startDate).strftime("%d")
    startDateStr = startDate_month + '+' + startDate_day + '+' + startDate_year
    endDate_year = parse(endDate).strftime("%Y")
    endDate_month = parse(endDate).strftime("%m")
    endDate_day = parse(endDate).strftime("%d")
    endDateStr = endDate_month + '+' + endDate_day + '+' + endDate_year
    #print(startDateStr + "to" + endDateStr)
    result = []
    for qryYear in list(range(int(startDate_year), int(endDate_year) + 1, 1)):
        for qryMonth in list(
                range(int(startDate_month),
                      int(endDate_month) + 1, 1)):
            #print(str(qryYear) + "-" + str(qryMonth))
            tpexHistoryUrl="http://www.tpex.org.tw/web/stock/aftertrading/daily_trading_info/st43_download.php?l=zh-tw&d=" + \
              str(qryYear-1911) + "/" + str(qryMonth)  +  "&stkno=" + stockId + "&s=0,asc,0"
            user_agent = ua.random
            headers = {'user-agent': user_agent}
            #headers={'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36'}
            print(tpexHistoryUrl)
            res = requests.get(tpexHistoryUrl, headers=headers)
            time.sleep(5)
            decoded_content = res.content.decode('Big5')
            #print(decoded_content.splitlines())
            ary = decoded_content.splitlines()
            del ary[0:5]
            ary.pop()
            #print(ary)
            cr = csv.reader(ary)
            ary1 = list(cr)
            #print(ary1)
            for row in ary1:
                if len(row) == 9:
                    strDate = row[0].replace("*", "")  #date
                    strOpen = row[3].replace(",", "")  #open
                    strHigh = row[4].replace(",", "")  #high
                    strLow = row[5].replace(",", "")  #Low
                    strClose = row[6].replace(",", "")  #Close
                    strVolume = row[1].replace(",", "")  #volume
                    if stockDataUtil.is_date(strDate) == True:
                        strDate1 = parse(
                            stockDataUtil.twYear2StandardYear(
                                strDate)).strftime("%Y-%m-%d")
                        strVolume1 = str(int(float(strVolume)))
                        data = stockDataUtil.filterHistoryData(
                            stockId, strDate1, strOpen, strHigh, strLow,
                            strClose, strVolume1)
                        if data != {}: result.append(data)
                        #print(data)
    return result