def handle(self, *args, **options): try: print('\nRunning discovery. Logging to logs/%s.log\n' % time.strftime('%Y%m%d')) job = Discovery() job.run() job.send_email_report() except Exception as e: Logger.error(traceback.format_exc()) error = 'FAILED DISCOVER ERROR: %s' % e self.send_error_email(error)
def ingest_new_citations(self): for opinion in self.new_opinions: Logger.info('Downloading: %s %s' % (opinion.name, opinion.pdf_url)) opinion.download() Logger.info('Scraping: %s %s' % (opinion.name, opinion.local_pdf)) try: opinion.scrape() except: Logger.error(traceback.format_exc()) self.failed_scrapes.append(opinion.name) if opinion.pdf.urls: Logger.info('Ingesting citations from %s' % opinion.name) opinion.ingest_citations() self.ingested_citations_count += opinion.ingested_citation_count
def get(cls, url=False, err=True): if url: # Wait 2 seconds between requests sleep(cls.SLEEP) check = urlparse(url) if not check.scheme: url = 'http://' + url try: return requests.get(url, headers=cls.HEADERS, timeout=cls.TIMEOUT,) except Exception: pass if err: Logger.error('Fetching failed for: %s' % url) return False
def get(cls, url=False, err=True): if url: # Wait 2 seconds between requests sleep(cls.SLEEP) check = urlparse(url) if not check.scheme: url = 'http://' + url try: return requests.get( url, headers=cls.HEADERS, timeout=cls.TIMEOUT, ) except Exception: pass if err: Logger.error('Fetching failed for: %s' % url) return False