class YahooScraper: SCRAPER_CODE = "YAHOO" def __init__(self,dbConnection): self._mainUrl = "https://finance.yahoo.com/q/hp?s=%s" self.dbAdapter = YahooDBAdapter(dbConnection) def parseQuoteAmount(self,divQuoteSummary,dailyQuote): # <div class="yfi_rt_quote_summary_rt_top"> # ... # 1) the stock quote # <span class="time_rtq_ticker"><span ...>13.64</span></span> spanQuote = divQuoteSummary.find("span",class_="time_rtq_ticker") quoteAmount = spanQuote.get_text(strip=True) # set the value in the datamodel dailyQuote.amount = StringUtils.stringToCentValue(str(quoteAmount)) def parseQuoteVariations(self,divQuoteSummary,dailyQuote): # <div class="yfi_rt_quote_summary_rt_top"> # ... # 2) the gain/loss amount # <span class="down_r time_rtq_content"> # <span ...><img ... alt="Down"><span ... >0.36</span></span> # <span ...><span ...>(2.57%)</span></span> # </span> spanVariation = divQuoteSummary.find("span",class_="time_rtq_content") # get the sign imgVariation = spanVariation.find("img") altAttr = imgVariation["alt"] # get the amount and the percentage spanVariationList = spanVariation.find_all("span") quoteVar = spanVariationList[0].get_text(strip=True) quoteVarPercentage = spanVariationList[1].get_text(strip=True).replace('(','').replace(')','').replace('%','') if altAttr == "Down" : dailyQuote.isGain = False else: dailyQuote.isGain = True # set the values in the datamodel dailyQuote.variation = StringUtils.stringToCentValue(str(quoteVar)) dailyQuote.variationPercentage = StringUtils.stringToCentValue(str(quoteVarPercentage)) def parseQuoteUpdateTime(self,divQuoteSummary,dailyQuote): # TODO really parse the update time dailyQuote.updateTime = datetime.now() def parseCurrency(self,htmlRoot,dailyQuote): # <p class="yfi_disclaimer">Currency in EUR.</p> pDisclaimer = htmlRoot.find("p",class_="yfi_disclaimer") disclaimerText = pDisclaimer.get_text() match = re.match("^Currency in (\w+)\.$", disclaimerText) currency = match.group(1) # set the value in the datamodel dailyQuote.currency = str(currency) def scrape(self): # save the db stockCodeList = self.dbAdapter.selectStockCodeList() listSize = len(stockCodeList) print "There are %s stock codes to process" % listSize index = 1 # for every code for stockCode in stockCodeList: print "[%d/%d] Scraping %s code..." % (index,listSize,stockCode), # perform a scrape dailyQuote = self.scrapeStockCode(stockCode) # update the quotation self.dbAdapter.updateQuotation(dailyQuote) index = index + 1 print "DONE" def scrapeStockCode(self,stockCode): # get the html page via requests htmlPage = requests.get(self._mainUrl % stockCode) # process the page via soup htmlRoot = BeautifulSoup(htmlPage.text,"lxml") dailyQuote = DailyQuote(stockCode,self.SCRAPER_CODE) # <html> containing the currency self.parseCurrency(htmlRoot,dailyQuote) # <div> containing the main values divQuoteSummary = htmlRoot.find("div",class_="yfi_rt_quote_summary_rt_top") self.parseQuoteAmount(divQuoteSummary,dailyQuote) self.parseQuoteVariations(divQuoteSummary,dailyQuote) self.parseQuoteUpdateTime(divQuoteSummary,dailyQuote) return dailyQuote
def __init__(self,dbConnection): self._mainUrl = "https://finance.yahoo.com/q/hp?s=%s" self.dbAdapter = YahooDBAdapter(dbConnection)