def create_print_issue_pdf(): """Create or update pdf for the current issue""" issue = current_issue() editions = [('', PAGES_GLOB), ('_mag', MAG_PAGES_GLOB)] results = [] for suffix, globpattern in editions: pdf_name = OUTPUT_PDF_NAME.format(issue=issue, suffix=suffix) issue_name = '{issue.number}/{issue.date.year}{suffix}'.format( suffix=suffix, issue=issue) logger.info('Creating pdf: {}'.format(issue_name)) tmp_bundle_file = tempfile.NamedTemporaryFile(suffix='.pdf') try: create_web_bundle(filename=tmp_bundle_file.name, globpattern=globpattern) except RuntimeWarning as warning: logger.info(str(warning)) continue try: # pdf_lookfor = pdf_name.split('.')[0] # print(pdf_name, pdf_lookfor) print_issue = PrintIssue.objects.get(pdf__endswith=pdf_name) except PrintIssue.DoesNotExist: print_issue = PrintIssue() with open(tmp_bundle_file.name, 'rb') as src: pdf_content = ContentFile(src.read()) print_issue.pdf.save(pdf_name, pdf_content, save=False) # print_issue.issue_name = issue_name print_issue.save() logger.info('New bundle file: {}'.format(pdf_name)) results.append(pdf_name) return results
def create_print_issue_pdf(issue, **kwargs): """Create or update pdf for the current issue""" if isinstance(issue, int): issue = Issue.objects.get(id=issue) editions = [('', PAGES_GLOB), ('_mag', MAG_PAGES_GLOB)] results = [] for suffix, fileglob in editions: pdf_name = OUTPUT_PDF_NAME.format(issue=issue, suffix=suffix) logger.info('Creating pdf: {}'.format(pdf_name)) tmp_bundle_file = tempfile.NamedTemporaryFile(suffix='.pdf') try: create_web_bundle( issue=issue, filename=tmp_bundle_file.name, fileglob=fileglob, **kwargs, ) except RuntimeWarning as warning: logger.info(str(warning)) continue try: print_issue = PrintIssue.objects.get(pdf__contains=pdf_name) except PrintIssue.DoesNotExist: print_issue = PrintIssue() with open(tmp_bundle_file.name, 'rb') as src: pdf_content = ContentFile(src.read()) print_issue.pdf.save(pdf_name, pdf_content, save=False) print_issue.issue = issue print_issue.save() logger.info('New bundle file: {}'.format(pdf_name)) results.append(pdf_name) return results
def create_print_issue_pdf(): """Create or update pdf for the current issue""" issue = current_issue() editions = [('', PAGES_GLOB), ('_mag', MAG_PAGES_GLOB)] results = [] for suffix, globpattern in editions: pdf_name = OUTPUT_PDF_NAME.format(issue=issue, suffix=suffix) issue_name = '{issue.number}/{issue.date.year}{suffix}'.format( suffix=suffix, issue=issue) logger.info('Creating pdf: {}'.format(issue_name)) tmp_bundle_file = tempfile.NamedTemporaryFile(suffix='.pdf') try: create_web_bundle( filename=tmp_bundle_file.name, globpattern=globpattern) except RuntimeWarning as warning: logger.info(str(warning)) continue try: # pdf_lookfor = pdf_name.split('.')[0] # print(pdf_name, pdf_lookfor) print_issue = PrintIssue.objects.get(pdf__endswith=pdf_name) except PrintIssue.DoesNotExist: print_issue = PrintIssue() with open(tmp_bundle_file.name, 'rb') as src: pdf_content = ContentFile(src.read()) print_issue.pdf.save(pdf_name, pdf_content, save=False) # print_issue.issue_name = issue_name print_issue.save() logger.info('New bundle file: {}'.format(pdf_name)) results.append(pdf_name) return results
def _get_content(print_issue: PrintIssue, first_page=2, last_page=999) -> str: """Get full text of a pdf issue""" try: return print_issue.get_page_text_content(first_page, last_page) except Exception as e: logger.exception('Error reading pdf:') return ''
def bundle_pdf(for_issue, logger): """ Finds pdf files on disks and creates PrintIssue objects. """ for code, suffix in (1, ''), (2, '_mag'): filename = FILENAME_PATTERN.format( issue=for_issue, suffix=suffix, ) files = get_staging_pdf_files(code) if len(files) == 0: logger.info('no files found, %s' % code) continue if len(files) % 4: logger.info('Incorrect number of pages (%d), %s' % (len(files), code)) continue pdf_path = os.path.join(PDF_FOLDER, filename) args = [PDF_MERGE, pdf_path] + files logger.debug('\n'.join(args)) subprocess.call(args) try: issue = PrintIssue.objects.get(pdf__endswith=filename) except PrintIssue.DoesNotExist: issue = PrintIssue() name = '{issue.number}/{issue.date.year}{suffix}'.format( suffix=suffix, issue=for_issue) with open(pdf_path, 'rb') as src: content = ContentFile(src.read()) issue.pdf.save(filename, content)
def test_create_printissue(fixture_pdf, settings, tempdir): # use temporary directory for pdf and frontpage file settings.MEDIA_ROOT = tempdir.name settings.DEFAULT_FILE_STORAGE = \ 'django.core.files.storage.FileSystemStorage' print_issue = PrintIssue() content = get_contentfile(fixture_pdf) filename = os.path.basename(fixture_pdf) # Save content of fixture pdf as well as model print_issue.pdf.save(filename, content) # Check that publication date works publication_date = print_issue.get_publication_date() assert publication_date > date(1900, 1, 1) assert 'fixture_universitas' in str(print_issue) assert print_issue.pages == 4 # Assert that an Issue has been created and a publication # date has been inferred from the pdf file content or file timestamp issue = print_issue.issue assert isinstance(issue, Issue) assert issue.publication_date > date(2000, 9, 9) # Assert that all fields are populated print_issue.full_clean() # Create thumbnail of cover page print_issue.get_cover_page() assert print_issue.cover_page.path.endswith( '/covers/fixture_universitas.jpg' )
def test_create_printissue(fixture_pdf, settings, tempdir): # use temporary directory for pdf and frontpage file settings.MEDIA_ROOT = tempdir.name settings.DEFAULT_FILE_STORAGE = \ 'django.core.files.storage.FileSystemStorage' print_issue = PrintIssue() content = get_contentfile(fixture_pdf) filename = os.path.basename(fixture_pdf) # Save content of fixture pdf as well as model print_issue.pdf.save(filename, content) # Check that publication date works publication_date = print_issue.get_publication_date() assert publication_date > date(1900, 1, 1) assert 'fixture_universitas' in str(print_issue) assert print_issue.pages == 4 # Assert that an Issue has been created and a publication # date has been inferred from the pdf file content or file timestamp issue = print_issue.issue assert isinstance(issue, Issue) assert issue.publication_date > date(2000, 9, 9) # Assert that all fields are populated print_issue.full_clean() # Create thumbnail of cover page print_issue.get_thumbnail() assert print_issue.cover_page.path.endswith( '/covers/fixture_universitas.jpg')