Example #1
0
 def ocr_url(identifier):
     """Generate URL to get OCR data from
     """
     identifier_parts = identifier.split('?')
     if len(identifier_parts) > 1:
         return ocr_base_url + identifier_parts[1]
     else:
         log.error('Could not generate OCR link for identifier ' + identifier)
         return ''
Example #2
0
    def article_ocr(identifier):
        """Retrieve ocr full text for article with identifier
        """
        url = DelpherAPI.ocr_url(identifier)
        try:
            response = request.get(url)
        except:
            log.exception('Could not get OCR data for url {url}.'.format(**locals()))
            return '<failed to load>'

        if response is None:
            log.error('Did not get OCR data for url {url}.'.format(**locals()))
            return '<failed to load>'
        else:
            # Each paragraph is one item in the list
            return "\n\n".join([response[key] for key in sorted(response.keys()) if key != 'title'])