def CrawlLowestStockPrice(self): try: webDriver = self.webCrawler.GetDriver() highLowPriceTable = webDriver.find_element_by_class_name( DefineManager.STOCK_HIGH_LOW_PRICE_INFO_TABLE) lowPriceTableRow = highLowPriceTable.find_elements_by_tag_name( DefineManager.TAG_TR)[ DefineManager.LOWEST_PRICE_SAVED_ROW_POINT] lowPriceTableCol = lowPriceTableRow.find_elements_by_tag_name( DefineManager.TAG_TD)[ DefineManager.LOWEST_PRICE_SAVED_COL_POINT] lowPrice = lowPriceTableCol.find_element_by_class_name( DefineManager.STOCK_LOW_NUMBER_CLASS_NAME) lowPriceNumberElements = lowPrice.find_elements_by_tag_name( DefineManager.TAG_SPAN) lowPriceStr = "" for indexOfSpanNumber in lowPriceNumberElements: lowPriceStr = lowPriceStr + indexOfSpanNumber.text LogManager.PrintLogMessage( "CrawlBasicInfo", "CrawlLowestStockPrice", "crawl lowest stock price successfully: " + lowPriceStr, DefineManager.LOG_LEVEL_INFO) return lowPriceStr except: LogManager.PrintLogMessage("CrawlBasicInfo", "CrawlLowestStockPrice", "crawl lowest stock price failed", DefineManager.LOG_LEVEL_ERROR) return None
def CrawlBestYearPrice(self): try: webDriver = self.webCrawler.GetDriver() sideTab = webDriver.find_element_by_class_name( DefineManager.STOCK_SIDE_TAB_CLASS_NAME) investmentOpinionSection = sideTab.find_element_by_class_name( DefineManager.STOCK_INVESTMENT_OPINION_CLASS_NAME) investmentOpinionRow = investmentOpinionSection.find_elements_by_tag_name( DefineManager.TAG_TR)[ DefineManager.BEST_PRICE_OF_THE_YEAR_ROW_POINT] investmentOpinionCols = investmentOpinionRow.find_elements_by_tag_name( DefineManager.TAG_EM) bestPriceOfTheYear = investmentOpinionCols[ DefineManager.BEST_PRICE_OF_THE_YEAR_COL_POINT].text LogManager.PrintLogMessage( "CrawlBasicInfo", "CrawlBestYearPrice", "crawl best price of the year successfully: " + bestPriceOfTheYear, DefineManager.LOG_LEVEL_INFO) return bestPriceOfTheYear except: LogManager.PrintLogMessage("CrawlBasicInfo", "CrawlBestYearPrice", "crawl best price of the year failed", DefineManager.LOG_LEVEL_ERROR) return None
def StartCrawl(self): LogManager.PrintLogMessage("CrawlRoutineManager", "StartCrawl", "crawl data", DefineManager.LOG_LEVEL_INFO) crawlDataDic = {} crawlBasicInfo = CrawlBasicInfo.CrawlBasicInfo(self.webCrawler, self.targetUrl) crawlDataDic["Name"] = crawlBasicInfo.CrawlCompanyName() or "" crawlDataDic["Code"] = crawlBasicInfo.CrawlCompanyStockCode() or "" crawlDataDic["Price"] = crawlBasicInfo.CrawlStockPrice() or "" crawlDataDic["D_PRH"] = crawlBasicInfo.CrawlHighestStockPrice() or "" crawlDataDic["D_PRL"] = crawlBasicInfo.CrawlLowestStockPrice() or "" crawlDataDic["Y_PRH"] = crawlBasicInfo.CrawlBestYearPrice() or "" crawlDataDic["Y_PRL"] = crawlBasicInfo.CrawlWorstYearPrice() or "" crawlDataDic["D_IV"] = crawlBasicInfo.CrawlDividendYield() or "" crawlDataDic["Change"] = crawlBasicInfo.CrawlPriceChangedPercent() or "" crawlDataDic["Value"] = crawlBasicInfo.CrawlMarketCapitalization() or "" crawlDataDic["Beta"] = crawlBasicInfo.CrawlYearBeta() or "" crawlDataDic["PER"] = crawlBasicInfo.CrawlPER() or "" crawlDataDic["PBR"] = crawlBasicInfo.CrawlPBR() or "" crawlDataDic["EPS"] = crawlBasicInfo.CrawlEPS() or "" crawlDetailInfo = CrawlDetailInfo.CrawlDetailInfo(self.webCrawler, self.targetDetailUrl) crawlDataDic["SALEQ2"] = crawlDetailInfo.Crawl3YearsBeforeSale() or "" crawlDataDic["SALEQ1"] = crawlDetailInfo.Crawl2YearsBeforeSale() or "" crawlDataDic["SALEQ0"] = crawlDetailInfo.Crawl1YearsBeforeSale() or "" crawlDataDic["NIQ2"] = crawlDetailInfo.Crawl3YearsBeforeNetIncome() or "" crawlDataDic["NIQ1"] = crawlDetailInfo.Crawl2YearsBeforeNetIncome() or "" crawlDataDic["NIQ0"] = crawlDetailInfo.Crawl1YearsBeforeNetIncome() or "" crawlDataDic["ACT"] = crawlDetailInfo.CrawlActQ3() or "" crawlDataDic["DPT"] = crawlDetailInfo.CrawlDptQ3() or "" crawlDataDic["CAP"] = crawlDetailInfo.CrawlCapQ3() or "" for key in crawlDataDic: LogManager.PrintLogMessage("CrawlRoutineManager", "StartCrawl", "" + key + ": " + crawlDataDic[key], DefineManager.LOG_LEVEL_DEBUG) self.crawlDataArray.append(crawlDataDic) LogManager.PrintLogMessage("CrawlRoutineManager", "StartCrawl", "saved crawl data size: " + str(self.crawlDataArray.__len__()), DefineManager.LOG_LEVEL_INFO)
def RunCrawling(self): LogManager.PrintLogMessage("CrawlRoutineManager", "RunCrawling", "running company stock price crawling", DefineManager.LOG_LEVEL_INFO) for indexOfCompanyCode in self.companyCodes: companyCode = indexOfCompanyCode self.targetUrl = "http://finance.naver.com/item/main.nhn?code=" + companyCode self.targetDetailUrl = "http://finance.naver.com/item/coinfo.nhn?code=" + companyCode + "&target=finsum_more" LogManager.PrintLogMessage("CrawlRoutineManager", "RunCrawling", "start crawling company: " + companyCode, DefineManager.LOG_LEVEL_INFO) self.StartCrawl()
def CloseDriver(self): LogManager.PrintLogMessage("WebCrawler", "CloseDriver", "close chrome browser", DefineManager.LOG_LEVEL_INFO) try: self.driver.quit() self.driverStatus = False except: LogManager.PrintLogMessage("WebCrawler", "CloseDriver", "cannot close chrome browser", DefineManager.LOG_LEVEL_ERROR) self.driverStatus = True
def TakePicture(self, url): if self.driverStatus == False: LogManager.PrintLogMessage("WebCrawler", "TakePicture", "chrome browser not working", DefineManager.LOG_LEVEL_WARN) return False if self.SetDriverUrl(url) == False: return False LogManager.PrintLogMessage("WebCrawler", "TakePicture", "taking shot screen url: " + url, DefineManager.LOG_LEVEL_INFO) self.driver.get_screenshot_as_file("../Src/test.png") return True
def SetDriverUrl(self, url): LogManager.PrintLogMessage("WebCrawler", "SetDriverUrl", "moving on " + url, DefineManager.LOG_LEVEL_INFO) try: self.driver.get(url) self.driver.implicitly_wait(3) return True except: LogManager.PrintLogMessage("WebCrawler", "SetDriverUrl", "connection failed " + url, DefineManager.LOG_LEVEL_ERROR) return False
def SwitchToDefault(self): try: self.driver.switch_to.default_content() LogManager.PrintLogMessage("WebCrawler", "SwitchToDefault", "frame switched", DefineManager.LOG_LEVEL_INFO) return self.driver except: LogManager.PrintLogMessage("WebCrawler", "SwitchToDefault", "frame not switched", DefineManager.LOG_LEVEL_ERROR) return None
def SwitchToFrame(self, frameTarget): try: self.driver.switch_to.frame(frameTarget) LogManager.PrintLogMessage("WebCrawler", "SwitchToFrame", "frame switched", DefineManager.LOG_LEVEL_INFO) return self.driver except: LogManager.PrintLogMessage("WebCrawler", "SwitchToFrame", "frame not switched", DefineManager.LOG_LEVEL_ERROR) return None
def __init__(self): LogManager.PrintLogMessage("WebCrawler", "__init__", "open chrome browser", DefineManager.LOG_LEVEL_INFO) try: options = webdriver.ChromeOptions() # options.add_argument('headless') options.add_argument('window-size=1920x1080') # options.add_argument("disable-gpu") self.driver = webdriver.Chrome(chrome_options=options) self.driverStatus = True except: LogManager.PrintLogMessage("WebCrawler", "__init__", "cannot open chrome browser", DefineManager.LOG_LEVEL_ERROR) self.driverStatus = False
def ClickElement(self, clickTarget): LogManager.PrintLogMessage("WebCrawler", "ClickElement", "try to click target", DefineManager.LOG_LEVEL_INFO) try: clickTarget.click() self.driver.implicitly_wait(DefineManager.DELAY) LogManager.PrintLogMessage("WebCrawler", "ClickElement", "target clicked", DefineManager.LOG_LEVEL_INFO) except: LogManager.PrintLogMessage("WebCrawler", "ClickElement", "cannot click target", DefineManager.LOG_LEVEL_ERROR)
def __init__(self, webCrawler, crawlUrl): self.webCrawler = webCrawler self.crawlUrl = crawlUrl urlStatus = str(self.webCrawler.SetDriverUrl(crawlUrl)) crawlerStatus = str(self.webCrawler.GetDriverStatus()) msg = "web driver status: " + crawlerStatus + " url status: " + urlStatus LogManager.PrintLogMessage("CrawlDetailInfo", "__init__", msg, DefineManager.LOG_LEVEL_INFO)
def CrawlCompanyName(self): try: webDriver = self.webCrawler.GetDriver() companyElements = webDriver.find_element_by_class_name( DefineManager.COMPANY_INFO_ELEMENTS_CLASS_NAME) companyName = companyElements.find_element_by_tag_name("a").text LogManager.PrintLogMessage( "CrawlBasicInfo", "CrawlCompanyName", "crawl company name successfully: " + companyName, DefineManager.LOG_LEVEL_INFO) return companyName except: LogManager.PrintLogMessage("CrawlBasicInfo", "CrawlCompanyName", "crawl company name failed", DefineManager.LOG_LEVEL_ERROR) return None
def __init__(self, targetUrl = "", targetDetailUrl = ""): self.targetUrl = targetUrl self.targetDetailUrl = targetDetailUrl self.companyCodes = [] self.crawlDataArray = [] LogManager.PrintLogMessage("CrawlRoutineManager", "__init__", "init routine manager", DefineManager.LOG_LEVEL_INFO) return
def CrawlEPS(self): try: webDriver = self.webCrawler.GetDriver() tabSubMenu = webDriver.find_element_by_class_name( DefineManager.STOCK_TAB_SUB_MENUS_CLASS_NAME) menuItems = tabSubMenu.find_elements_by_tag_name( DefineManager.TAG_A) self.webCrawler.ClickElement( menuItems[DefineManager.ITEM_ANALYSIS_POINT]) subHtmlIframe = webDriver.find_element_by_id("coinfo_cp") webDriver = self.webCrawler.SwitchToFrame(subHtmlIframe) fundamentalTable = webDriver.find_element_by_class_name( DefineManager.FUNDAMENTAL_TABLE_CLASS_NAME) fundamentalRows = fundamentalTable.find_elements_by_tag_name( DefineManager.TAG_TR) fundamentalEpsRow = fundamentalRows[ DefineManager.FUNDAMENTAL_EPS_ROW_POINT] fundamentalEpsStr = fundamentalEpsRow.find_elements_by_tag_name( DefineManager.TAG_TD)[DefineManager.TABLE_RIGHT_SIDE].text LogManager.PrintLogMessage( "CrawlBasicInfo", "CrawlPBR", "crawl EPS successfully: " + fundamentalEpsStr, DefineManager.LOG_LEVEL_INFO) webDriver = self.webCrawler.SwitchToDefault() tabSubMenu = webDriver.find_element_by_class_name( DefineManager.STOCK_TAB_SUB_MENUS_CLASS_NAME) menuItems = tabSubMenu.find_elements_by_tag_name( DefineManager.TAG_A) self.webCrawler.ClickElement( menuItems[DefineManager.TOTAL_INFO_POINT]) return fundamentalEpsStr except: LogManager.PrintLogMessage("CrawlBasicInfo", "CrawlPBR", "crawl EPS failed", DefineManager.LOG_LEVEL_ERROR) return None
def Crawl3YearsBeforeNetIncome(self): try: webDriver = self.webCrawler.GetDriver() subHtmlIframe = webDriver.find_element_by_id("coinfo_cp") webDriver = self.webCrawler.SwitchToFrame(subHtmlIframe) financialTable = webDriver.find_element_by_id(DefineManager.FINANCIAL_TABLE_ID_NAME) financialRows = financialTable.find_elements_by_tag_name(DefineManager.TAG_TR) financialNetIncomeRow = financialRows[DefineManager.FINANCIAL_NET_INCOME_ROW_POINT] financialNetIncomeStr = financialNetIncomeRow.find_elements_by_tag_name(DefineManager.TAG_TD)[DefineManager.FINANCIAL_NET_INCOME_3_YEARS_BEFORE_COL_POINT].text LogManager.PrintLogMessage("CrawlDetailInfo", "Crawl3YearsBeforeNetIncome", "crawl 3 years before net income successfully: " + financialNetIncomeStr, DefineManager.LOG_LEVEL_INFO) webDriver = self.webCrawler.SwitchToDefault() return financialNetIncomeStr except: LogManager.PrintLogMessage("CrawlDetailInfo", "Crawl3YearsBeforeNetIncome", "crawl 3 years before net income failed", DefineManager.LOG_LEVEL_ERROR) return None
def CrawlActQ3(self): try: webDriver = self.webCrawler.GetDriver() subHtmlIframe = webDriver.find_element_by_id("coinfo_cp") webDriver = self.webCrawler.SwitchToFrame(subHtmlIframe) financialTable = webDriver.find_element_by_id(DefineManager.FINANCIAL_TABLE_ID_NAME) financialRows = financialTable.find_elements_by_tag_name(DefineManager.TAG_TR) financialActRow = financialRows[DefineManager.FINANCIAL_ACT_ROW_POINT] financialActStr = financialActRow.find_elements_by_tag_name(DefineManager.TAG_TD)[DefineManager.FINANCIAL_Q3_COL_POINT].text LogManager.PrintLogMessage("CrawlDetailInfo", "CrawlActQ3", "crawl ACT Q3 successfully: " + financialActStr, DefineManager.LOG_LEVEL_INFO) webDriver = self.webCrawler.SwitchToDefault() return financialActStr except: LogManager.PrintLogMessage("CrawlDetailInfo", "CrawlActQ3", "crawl ACT Q3 failed", DefineManager.LOG_LEVEL_ERROR) return None
def CrawlMarketCapitalization(self): try: webDriver = self.webCrawler.GetDriver() marketCapitalizationTable = webDriver.find_element_by_class_name( DefineManager.MARKET_CAPITALIZATION_CLASS_NAME) marketCapitalizationStr = marketCapitalizationTable.find_element_by_tag_name( DefineManager.TAG_TD).text LogManager.PrintLogMessage( "CrawlBasicInfo", "CrawlMarketCapitalization", "crawl market capitalization successfully: " + marketCapitalizationStr, DefineManager.LOG_LEVEL_INFO) return marketCapitalizationStr except: LogManager.PrintLogMessage("CrawlBasicInfo", "CrawlMarketCapitalization", "crawl market capitalization failed", DefineManager.LOG_LEVEL_ERROR) return None
def CrawlYearBeta(self): try: webDriver = self.webCrawler.GetDriver() tabSubMenu = webDriver.find_element_by_class_name( DefineManager.STOCK_TAB_SUB_MENUS_CLASS_NAME) menuItems = tabSubMenu.find_elements_by_tag_name( DefineManager.TAG_A) self.webCrawler.ClickElement( menuItems[DefineManager.ITEM_ANALYSIS_POINT]) subHtmlIframe = webDriver.find_element_by_id("coinfo_cp") webDriver = self.webCrawler.SwitchToFrame(subHtmlIframe) priceQuoteTable = webDriver.find_element_by_id( DefineManager.STOCK_QUOTE_TABLE_ID_NAME) yearBetaRow = priceQuoteTable.find_elements_by_tag_name( DefineManager.TAG_TR)[DefineManager.YEAR_BETA_ROW_POINT] yearBetaStr = yearBetaRow.find_element_by_tag_name( DefineManager.TAG_TD).text LogManager.PrintLogMessage( "CrawlBasicInfo", "CrawlYearBeta", "crawl year beta successfully: " + yearBetaStr, DefineManager.LOG_LEVEL_INFO) webDriver = self.webCrawler.SwitchToDefault() tabSubMenu = webDriver.find_element_by_class_name( DefineManager.STOCK_TAB_SUB_MENUS_CLASS_NAME) menuItems = tabSubMenu.find_elements_by_tag_name( DefineManager.TAG_A) self.webCrawler.ClickElement( menuItems[DefineManager.TOTAL_INFO_POINT]) return yearBetaStr except: LogManager.PrintLogMessage("CrawlBasicInfo", "CrawlYearBeta", "crawl year beta failed", DefineManager.LOG_LEVEL_ERROR) return None
def GetTodayWeather(webCrawler): if webCrawler.SetDriverUrl( "http://openweathermap.org/city/1835848") == False: return None webDriver = webCrawler.GetDriver() weatherWidgetClass = webDriver.find_element_by_class_name( "weather-widget__main") todayWeather = weatherWidgetClass.text if todayWeather != None: LogManager.PrintLogMessage("Weather", "GetTodayWeather", "today is " + todayWeather, DefineManager.LOG_LEVEL_INFO) return todayWeather
def CrawlDividendYield(self): try: webDriver = self.webCrawler.GetDriver() sideTab = webDriver.find_element_by_class_name( DefineManager.STOCK_SIDE_TAB_CLASS_NAME) perEpsTable = sideTab.find_element_by_class_name( DefineManager.STOCK_PER_EPS_CLASS_NAME) dividendYieldRow = perEpsTable.find_elements_by_tag_name( DefineManager.TAG_TR)[DefineManager.DIVIDEND_YIELD_ROW_POINT] dividendYieldPersent = dividendYieldRow.find_element_by_id( DefineManager.STOCK_DIVIDEND_YIELD_ID_NAME).text LogManager.PrintLogMessage( "CrawlBasicInfo", "CrawlDividendYield", "crawl dividend yield successfully: " + dividendYieldPersent, DefineManager.LOG_LEVEL_INFO) return dividendYieldPersent except: LogManager.PrintLogMessage("CrawlBasicInfo", "CrawlDividendYield", "crawl dividend yield failed", DefineManager.LOG_LEVEL_ERROR) return None
def __init__(self): LogManager.PrintLogMessage("ExportDataManager", "__init__", "init", DefineManager.LOG_LEVEL_INFO) self.dataArray = [] return
def SetDataArray(self, dataArray): LogManager.PrintLogMessage("ExportDataManager", "SetDataArray", "setup data array", DefineManager.LOG_LEVEL_INFO) self.dataArray = dataArray
def OpenWebDriver(self): LogManager.PrintLogMessage("CrawlRoutineManager", "OpenWebDriver", "open web driver", DefineManager.LOG_LEVEL_INFO) self.webCrawler = WebCrawler.WebCrawler()
def SetCrawlCompanyCode(self, companyCodes): LogManager.PrintLogMessage("CrawlRoutineManager", "SetCrawlCompanyCode", "setup companyCodes", DefineManager.LOG_LEVEL_INFO) self.companyCodes = companyCodes
def CloseWebDriver(self): LogManager.PrintLogMessage("CrawlRoutineManager", "CloseWebDriver", "shut down web driver", DefineManager.LOG_LEVEL_INFO) self.webCrawler.CloseDriver()
def GetCrawlDataArray(self): LogManager.PrintLogMessage("CrawlRoutineManager", "GetCrawlDataArray", "return crawled data array", DefineManager.LOG_LEVEL_INFO) return self.crawlDataArray