Example #1
0
class YahooScraper:
    SCRAPER_CODE = "YAHOO"
    
    def __init__(self,dbConnection):
        self._mainUrl = "https://finance.yahoo.com/q/hp?s=%s"
        self.dbAdapter = YahooDBAdapter(dbConnection)
    
    def parseQuoteAmount(self,divQuoteSummary,dailyQuote):
        #     <div class="yfi_rt_quote_summary_rt_top">
        #        ...
        #            1) the stock quote
        #            <span class="time_rtq_ticker"><span ...>13.64</span></span>
        spanQuote = divQuoteSummary.find("span",class_="time_rtq_ticker")
        quoteAmount = spanQuote.get_text(strip=True)
    
        # set the value in the datamodel            
        dailyQuote.amount = StringUtils.stringToCentValue(str(quoteAmount))
    
    def parseQuoteVariations(self,divQuoteSummary,dailyQuote):
        #     <div class="yfi_rt_quote_summary_rt_top">
        #        ...
        #            2) the gain/loss amount
        #            <span class="down_r time_rtq_content">
        #                <span ...><img ... alt="Down"><span ... >0.36</span></span>
        #                <span ...><span ...>(2.57%)</span></span> 
        #            </span>
        spanVariation = divQuoteSummary.find("span",class_="time_rtq_content")
            
        # get the sign
        imgVariation = spanVariation.find("img")
        altAttr = imgVariation["alt"]
        
        # get the amount and the percentage
        spanVariationList = spanVariation.find_all("span")
        quoteVar = spanVariationList[0].get_text(strip=True)
        quoteVarPercentage = spanVariationList[1].get_text(strip=True).replace('(','').replace(')','').replace('%','')
        if altAttr == "Down" :
            dailyQuote.isGain = False
        else:
            dailyQuote.isGain = True

        # set the values in the datamodel            
        dailyQuote.variation = StringUtils.stringToCentValue(str(quoteVar))
        dailyQuote.variationPercentage = StringUtils.stringToCentValue(str(quoteVarPercentage))
    
    def parseQuoteUpdateTime(self,divQuoteSummary,dailyQuote):
        # TODO really parse the update time
        dailyQuote.updateTime = datetime.now()
    
    def parseCurrency(self,htmlRoot,dailyQuote):
        #     <p class="yfi_disclaimer">Currency in EUR.</p>
        pDisclaimer = htmlRoot.find("p",class_="yfi_disclaimer")
        disclaimerText = pDisclaimer.get_text()
        
        match = re.match("^Currency in (\w+)\.$", disclaimerText)
        currency = match.group(1)
            
        # set the value in the datamodel
        dailyQuote.currency = str(currency)
    
    
    
    def scrape(self):
        # save the db
        stockCodeList = self.dbAdapter.selectStockCodeList()
        listSize = len(stockCodeList)
        
        
        print "There are %s stock codes to process" % listSize
        index = 1
        
        # for every code
        for stockCode in stockCodeList:
            print "[%d/%d] Scraping %s code..." % (index,listSize,stockCode),
            # perform a scrape
            dailyQuote = self.scrapeStockCode(stockCode)
            # update the quotation
            self.dbAdapter.updateQuotation(dailyQuote)
            
            index = index + 1
            print "DONE"
    
    
    def scrapeStockCode(self,stockCode):
        # get the html page via requests
        htmlPage = requests.get(self._mainUrl % stockCode)
        # process the page via soup
        htmlRoot = BeautifulSoup(htmlPage.text,"lxml")
               
        dailyQuote = DailyQuote(stockCode,self.SCRAPER_CODE)

        # <html> containing the currency        
        self.parseCurrency(htmlRoot,dailyQuote)
        
        # <div> containing the main values
        divQuoteSummary = htmlRoot.find("div",class_="yfi_rt_quote_summary_rt_top")
        self.parseQuoteAmount(divQuoteSummary,dailyQuote)
        self.parseQuoteVariations(divQuoteSummary,dailyQuote) 
        self.parseQuoteUpdateTime(divQuoteSummary,dailyQuote)
               
        return dailyQuote
Example #2
0
 def __init__(self,dbConnection):
     self._mainUrl = "https://finance.yahoo.com/q/hp?s=%s"
     self.dbAdapter = YahooDBAdapter(dbConnection)