class YahooArticleGetter(object):
    """
    Download articles from Yahoo Finance.
    """

    def __init__(self):
        self.headlines_url = 'http://finance.yahoo.com/q/h?s='
        self.db_model = YahooDbModel()
        self.article_parser = ArticleParser()
        self.exec_error = False
        self.parse_datetime = False
        # Precompiled patterns
        self.native_p = re.compile('^http://finance.yahoo.com/news/.+')
        self.h_time_p = re.compile('.+ (\d+:\d+\w\w) .*')

    #### METHOD 1: get new articles
        
    def get_new_articles(self):
        """Main method for getting and saving new articles."""
        # Browse through all companies.
        for company in self.db_model.get_companies():
            print "====%d: %s====" % (company['id'], company['ticker'])
            # Get headlines and process so far unsaved articles.
            try: 
                self.get_headlines(company['ticker'], company['id'], company['article_newest_saved'])
            except Exception, e:
                self.exec_error = True
                print "serious error: "+repr(e)
                self.__send_serious_error(e)
                break   # end script
        # Log execution.
        self.db_model.add_log_exec(4, self.exec_error)
 def __init__(self):
     self.headlines_url = 'http://finance.yahoo.com/q/h?s='
     self.db_model = YahooDbModel()
     self.article_parser = ArticleParser()
     self.exec_error = False
     self.parse_datetime = False
     # Precompiled patterns
     self.native_p = re.compile('^http://finance.yahoo.com/news/.+')
     self.h_time_p = re.compile('.+ (\d+:\d+\w\w) .*')
 def __init__(self, fb_config, tw_config):
     self.headlines_url = 'http://finance.yahoo.com/quote/'
     self.db_model = YahooDbModel()
     self.article_parser = ArticleParser()
     self.exec_error = False
     # Share count
     self.fb_api = facebook.GraphAPI(fb_config['access_token'], version='2.7')
     self.tw_api = twython.Twython(app_key=tw_config['app_key'], access_token=tw_config['access_token'])
     # Yahoo comments
     self.com_url_template = (
             'http://finance.yahoo.com/_finance_doubledown/api/resource/CommentsService.comments;count={com_count};'
             'publisher=finance-en-US;sortBy=highestRated;uuid={yahoo_uuid}?'
             'bkt=fintest008&device=desktop&feature=&intl=us&lang=en-US&partner=none&region=US&site=finance&'
             'tz=Europe%2FPrague&ver=0.101.427&returnMeta=true')
class YahooArticleGetter(object):
    """
    Download articles from Yahoo Finance.
    """

    def __init__(self, fb_config, tw_config):
        self.headlines_url = 'http://finance.yahoo.com/quote/'
        self.db_model = YahooDbModel()
        self.article_parser = ArticleParser()
        self.exec_error = False
        # Share count
        self.fb_api = facebook.GraphAPI(fb_config['access_token'], version='2.7')
        self.tw_api = twython.Twython(app_key=tw_config['app_key'], access_token=tw_config['access_token'])
        # Yahoo comments
        self.com_url_template = (
                'http://finance.yahoo.com/_finance_doubledown/api/resource/CommentsService.comments;count={com_count};'
                'publisher=finance-en-US;sortBy=highestRated;uuid={yahoo_uuid}?'
                'bkt=fintest008&device=desktop&feature=&intl=us&lang=en-US&partner=none&region=US&site=finance&'
                'tz=Europe%2FPrague&ver=0.101.427&returnMeta=true')

    #### METHOD 1: get new articles
        
    def get_new_articles(self, company_sleep=(10, 15)):
        """Main method for getting and saving new articles for all companies."""
        for company in self.db_model.get_companies():
            print "====%d: %s====" % (company['id'], company['ticker'])
            try:
                self.get_headlines(company['ticker'], company['id'], company['article_newest_saved'])
                time.sleep(random.uniform(company_sleep[0], company_sleep[1]))
            except Exception:
                self.exec_error = True
                print "serious error: {0}".format(traceback.format_exc())
                self.__send_serious_error(traceback.format_exc())

    
    def get_headlines(self, ticker, company_id, last_date_in_db):
        """
        Get headlines and save new articles for given company.
        """
        # Get ticker page
        ticker_url = self.headlines_url + ticker
        page_html = self._get_content_from_url(ticker_url, True, 5)
        #page = open('../test_data/ticker_not_found.htm').readlines()
        # Check if ticker page exists.
        if not page_html:
            print('Headlines for %s could not be loaded.') % ticker
            return False
        header_line = page_html[0]
        if '<title></title>' in header_line:
            self.exec_error = True
            print("Ticker %s does not exist.") % ticker
            self.__send_ticker_error(ticker)
            return False
        # Find App Data
        app_data = self._try_to_get_appdata(ticker_url, page_html)
        if not app_data:
            self.exec_error = True
            msg = 'JSON data was not found (ticker %s).'
            print(msg % ticker)
            self.__send_serious_error(msg % ticker)
            return False
        # Get the articles.
        page_name = app_data['context']['dispatcher']['stores']['StreamStore']['pageCategory']
        page_field = '%s.mega' % page_name
        try:
            articles = app_data['context']['dispatcher']['stores']['StreamStore']['streams'][page_field]['data']['stream_items']
        except KeyError, e:
            print "Page key error:" + str(e)
            return False
        # Process all articles (from oldest to newest, 10 articles into history).
        for art in reversed(articles):
            self.__process_article_from_list(art, company_id, last_date_in_db)
        # Commit inserts and update newest saved article datetime.
        self.db_model.update_last_download(company_id)