def test_is_first_month_in_quarter(self): answers = { 1: True, 2: False, 3: False, 4: True, 5: False, 6: False, 7: True, } for month, is_first in answers.iteritems(): self.assertEqual(is_first_month_in_quarter(month), is_first)
def _download(self, request_dict={}): """Overrides the download function so that we can catch 404 errors silently. This is necessary because these web pages simply do not exist for several days at the beginning of each quarter. """ try: return super(Site, self)._download() except HTTPError, e: is_first_days_of_the_quarter = (date.today().day <= 15 and is_first_month_in_quarter(date.today().month)) got_404 = e.response.status_code == 404 if got_404 and is_first_days_of_the_quarter: # Do nothing; abort the crawler self.status = 200 # We need the body tag here so that xpath works elsewhere. html_tree = html.fromstring('<html><body></body></html>') return html_tree else: raise e
def _download(self, request_dict={}): """Overrides the download function so that we can catch 404 errors silently. This is necessary because these web pages simply do not exist for several days at the beginning of each quarter. """ try: return super(Site, self)._download() except HTTPError, e: is_first_days_of_the_quarter = (date.today().day <= 15 and is_first_month_in_quarter( date.today().month)) got_404 = (e.response.status_code == 404) if got_404 and is_first_days_of_the_quarter: # Do nothing; abort the crawler self.status = 200 # We need the body tag here so that xpath works elsewhere. html_tree = html.fromstring('<html><body></body></html>') return html_tree else: raise e