Esempio n. 1
0
def create_print_issue_pdf():
    """Create or update pdf for the current issue"""

    issue = current_issue()
    editions = [('', PAGES_GLOB), ('_mag', MAG_PAGES_GLOB)]
    results = []
    for suffix, globpattern in editions:

        pdf_name = OUTPUT_PDF_NAME.format(issue=issue, suffix=suffix)
        issue_name = '{issue.number}/{issue.date.year}{suffix}'.format(
            suffix=suffix, issue=issue)
        logger.info('Creating pdf: {}'.format(issue_name))
        tmp_bundle_file = tempfile.NamedTemporaryFile(suffix='.pdf')
        try:
            create_web_bundle(filename=tmp_bundle_file.name,
                              globpattern=globpattern)
        except RuntimeWarning as warning:
            logger.info(str(warning))
            continue
        try:
            # pdf_lookfor = pdf_name.split('.')[0]
            # print(pdf_name, pdf_lookfor)
            print_issue = PrintIssue.objects.get(pdf__endswith=pdf_name)
        except PrintIssue.DoesNotExist:
            print_issue = PrintIssue()
        with open(tmp_bundle_file.name, 'rb') as src:
            pdf_content = ContentFile(src.read())
        print_issue.pdf.save(pdf_name, pdf_content, save=False)
        # print_issue.issue_name = issue_name
        print_issue.save()
        logger.info('New bundle file: {}'.format(pdf_name))
        results.append(pdf_name)
    return results
Esempio n. 2
0
def create_print_issue_pdf(issue, **kwargs):
    """Create or update pdf for the current issue"""
    if isinstance(issue, int):
        issue = Issue.objects.get(id=issue)

    editions = [('', PAGES_GLOB), ('_mag', MAG_PAGES_GLOB)]
    results = []
    for suffix, fileglob in editions:

        pdf_name = OUTPUT_PDF_NAME.format(issue=issue, suffix=suffix)
        logger.info('Creating pdf: {}'.format(pdf_name))
        tmp_bundle_file = tempfile.NamedTemporaryFile(suffix='.pdf')
        try:
            create_web_bundle(
                issue=issue,
                filename=tmp_bundle_file.name,
                fileglob=fileglob,
                **kwargs,
            )
        except RuntimeWarning as warning:
            logger.info(str(warning))
            continue
        try:
            print_issue = PrintIssue.objects.get(pdf__contains=pdf_name)
        except PrintIssue.DoesNotExist:
            print_issue = PrintIssue()
        with open(tmp_bundle_file.name, 'rb') as src:
            pdf_content = ContentFile(src.read())
        print_issue.pdf.save(pdf_name, pdf_content, save=False)
        print_issue.issue = issue
        print_issue.save()
        logger.info('New bundle file: {}'.format(pdf_name))
        results.append(pdf_name)
    return results
Esempio n. 3
0
def create_print_issue_pdf():
    """Create or update pdf for the current issue"""

    issue = current_issue()
    editions = [('', PAGES_GLOB), ('_mag', MAG_PAGES_GLOB)]
    results = []
    for suffix, globpattern in editions:

        pdf_name = OUTPUT_PDF_NAME.format(issue=issue, suffix=suffix)
        issue_name = '{issue.number}/{issue.date.year}{suffix}'.format(
            suffix=suffix, issue=issue)
        logger.info('Creating pdf: {}'.format(issue_name))
        tmp_bundle_file = tempfile.NamedTemporaryFile(suffix='.pdf')
        try:
            create_web_bundle(
                filename=tmp_bundle_file.name, globpattern=globpattern)
        except RuntimeWarning as warning:
            logger.info(str(warning))
            continue
        try:
            # pdf_lookfor = pdf_name.split('.')[0]
            # print(pdf_name, pdf_lookfor)
            print_issue = PrintIssue.objects.get(pdf__endswith=pdf_name)
        except PrintIssue.DoesNotExist:
            print_issue = PrintIssue()
        with open(tmp_bundle_file.name, 'rb') as src:
            pdf_content = ContentFile(src.read())
        print_issue.pdf.save(pdf_name, pdf_content, save=False)
        # print_issue.issue_name = issue_name
        print_issue.save()
        logger.info('New bundle file: {}'.format(pdf_name))
        results.append(pdf_name)
    return results
def _get_content(print_issue: PrintIssue, first_page=2, last_page=999) -> str:
    """Get full text of a pdf issue"""
    try:
        return print_issue.get_page_text_content(first_page, last_page)
    except Exception as e:
        logger.exception('Error reading pdf:')
        return ''
Esempio n. 5
0
def bundle_pdf(for_issue, logger):
    """ Finds pdf files on disks and creates PrintIssue objects. """
    for code, suffix in (1, ''), (2, '_mag'):
        filename = FILENAME_PATTERN.format(
            issue=for_issue,
            suffix=suffix,
        )
        files = get_staging_pdf_files(code)

        if len(files) == 0:
            logger.info('no files found, %s' % code)
            continue

        if len(files) % 4:
            logger.info('Incorrect number of pages (%d), %s' %
                        (len(files), code))
            continue

        pdf_path = os.path.join(PDF_FOLDER, filename)

        args = [PDF_MERGE, pdf_path] + files
        logger.debug('\n'.join(args))
        subprocess.call(args)

        try:
            issue = PrintIssue.objects.get(pdf__endswith=filename)
        except PrintIssue.DoesNotExist:
            issue = PrintIssue()

        name = '{issue.number}/{issue.date.year}{suffix}'.format(
            suffix=suffix, issue=for_issue)
        with open(pdf_path, 'rb') as src:
            content = ContentFile(src.read())
        issue.pdf.save(filename, content)
Esempio n. 6
0
def test_create_printissue(fixture_pdf, settings, tempdir):

    # use temporary directory for pdf and frontpage file
    settings.MEDIA_ROOT = tempdir.name
    settings.DEFAULT_FILE_STORAGE = \
        'django.core.files.storage.FileSystemStorage'

    print_issue = PrintIssue()
    content = get_contentfile(fixture_pdf)
    filename = os.path.basename(fixture_pdf)

    # Save content of fixture pdf as well as model
    print_issue.pdf.save(filename, content)

    # Check that publication date works
    publication_date = print_issue.get_publication_date()
    assert publication_date > date(1900, 1, 1)

    assert 'fixture_universitas' in str(print_issue)
    assert print_issue.pages == 4

    # Assert that an Issue has been created and a publication
    # date has been inferred from the pdf file content or file timestamp
    issue = print_issue.issue
    assert isinstance(issue, Issue)
    assert issue.publication_date > date(2000, 9, 9)

    # Assert that all fields are populated
    print_issue.full_clean()

    # Create thumbnail of cover page
    print_issue.get_cover_page()
    assert print_issue.cover_page.path.endswith(
        '/covers/fixture_universitas.jpg'
    )
Esempio n. 7
0
def test_create_printissue(fixture_pdf, settings, tempdir):

    # use temporary directory for pdf and frontpage file
    settings.MEDIA_ROOT = tempdir.name
    settings.DEFAULT_FILE_STORAGE = \
        'django.core.files.storage.FileSystemStorage'

    print_issue = PrintIssue()
    content = get_contentfile(fixture_pdf)
    filename = os.path.basename(fixture_pdf)

    # Save content of fixture pdf as well as model
    print_issue.pdf.save(filename, content)

    # Check that publication date works
    publication_date = print_issue.get_publication_date()
    assert publication_date > date(1900, 1, 1)

    assert 'fixture_universitas' in str(print_issue)
    assert print_issue.pages == 4

    # Assert that an Issue has been created and a publication
    # date has been inferred from the pdf file content or file timestamp
    issue = print_issue.issue
    assert isinstance(issue, Issue)
    assert issue.publication_date > date(2000, 9, 9)

    # Assert that all fields are populated
    print_issue.full_clean()

    # Create thumbnail of cover page
    print_issue.get_thumbnail()
    assert print_issue.cover_page.path.endswith(
        '/covers/fixture_universitas.jpg')