def ocr_url(identifier): """Generate URL to get OCR data from """ identifier_parts = identifier.split('?') if len(identifier_parts) > 1: return ocr_base_url + identifier_parts[1] else: log.error('Could not generate OCR link for identifier ' + identifier) return ''
def article_ocr(identifier): """Retrieve ocr full text for article with identifier """ url = DelpherAPI.ocr_url(identifier) try: response = request.get(url) except: log.exception('Could not get OCR data for url {url}.'.format(**locals())) return '<failed to load>' if response is None: log.error('Did not get OCR data for url {url}.'.format(**locals())) return '<failed to load>' else: # Each paragraph is one item in the list return "\n\n".join([response[key] for key in sorted(response.keys()) if key != 'title'])