コード例 #1
0
ファイル: test_parsers.py プロジェクト: mstarzyk/pikepdf
def test_parser_exception(resources):
    pdf = Pdf.open(resources / 'graph.pdf')
    stream = pdf.pages[0]['/Contents']
    with pytest.raises(ValueError):
        Object._parse_stream(stream, ExceptionParser())
コード例 #2
0
ファイル: test_foreign.py プロジェクト: sahwar/pikepdf
def outlines(resources):
    return Pdf.open(resources / 'outlines.pdf')
コード例 #3
0
ファイル: test_parsers.py プロジェクト: victor8733/pikepdf
def test_has_text(resources, test_file, expected):
    pdf = Pdf.open(resources / test_file)
    for p in pdf.pages:
        page = Page(p)
        assert page.has_text() == expected
コード例 #4
0
ファイル: test_sanity.py プロジェクト: xjqx05/pikepdf
def test_open_pdf_password(resources):
    pdf = Pdf.open(resources / 'graph-encrypted.pdf', password='******')
    assert pdf.root['/Pages']['/Count'] == 1
コード例 #5
0
def sandwich(resources):
    # Has XMP, docinfo, <?adobe-xap-filters esc="CRLF"?>, shorthand attribute XMP
    with Pdf.open(resources / 'sandwich.pdf') as pdf:
        yield pdf
コード例 #6
0
 def test_open_pdf_password_encoding(self, resources):
     with pytest.raises(PasswordError):
         Pdf.open(resources / 'graph-encrypted.pdf', password=b'\x01\xfe')
コード例 #7
0
 def test_stream(self, resources):
     with (resources / 'pal-1bit-trivial.pdf').open('rb') as stream:
         pdf = Pdf.open(stream)
     assert pdf.root.Pages.Count == 1
コード例 #8
0
def test_file_descriptor(resources):
    with (resources / 'pal-1bit-trivial.pdf').open('rb') as f:
        with pytest.raises(TypeError):
            Pdf.open(f.fileno())
コード例 #9
0
 def test_some_permissions_missing(self, resources):
     pdf = Pdf.open(resources / 'graph-encrypted.pdf', 'owner')
     assert pdf.allow.print_highres == pdf.allow.modify_annotation == False
コード例 #10
0
def test_attr_access(resources):
    with Pdf.open(resources / 'graph.pdf') as pdf:
        assert int(pdf.Root.Pages.Count) == 1
コード例 #11
0
 def test_read_not_readable_file(self, outdir):
     writable = (Path(outdir) / 'writeme.pdf').open('wb')
     with pytest.raises(ValueError, match=r'not readable'):
         Pdf.open(writable)
コード例 #12
0
ファイル: compile.py プロジェクト: 61a-ide/cs61a-apps
def compile(
    exam,
    json,
    md,
    seed,
    subtitle,
    with_solutions,
    exam_type,
    semester,
    json_out,
    merged_md,
    draft,
    out,
):
    """
    Compile one PDF or JSON (from Markdown), unencrypted.
    The exam may be deployed or local (in Markdown or JSON).
    If a seed is specified, it will scramble the exam.
    """
    if not out:
        out = ""

    pathlib.Path(out).mkdir(parents=True, exist_ok=True)

    if json:
        print("Loading exam...")
        exam_data = load(json)
    elif md:
        exam_text_data = md.read()
        if merged_md:
            buff = LineBuffer(exam_text_data)
            handle_imports(buff, path=os.path.dirname(md.name))
            merged_md.write("\n".join(buff.lines))
            return
        print("Compiling exam...")
        exam_data = convert(exam_text_data, path=os.path.dirname(md.name), draft=draft)
    else:
        print("Fetching exam...")
        exam_data = get_exam(exam=exam)

    if seed:
        print("Scrambling exam...")
        exam_data = scramble(seed, exam_data, keep_data=with_solutions)

    def remove_solutions_from_groups(groups):
        for group in groups:
            # if isinstance(group, dict):
            group.pop("solution", None)
            if group.get("type") == "group":
                remove_solutions_from_groups(group.get("elements", []))

    if not seed and not with_solutions:
        print("Removing solutions...")
        groups = exam_data.get("groups", [])
        remove_solutions_from_groups(groups)

    if json_out:
        print("Dumping json...")
        dump(exam_data, json_out, indent=4, sort_keys=True)
        return

    print("Rendering exam...")
    settings = {
        "coursecode": prettify(exam.split("-")[0]),
        "description": subtitle,
        "examtype": exam_type,
        "semester": semester,
    }
    if seed:
        settings["emailaddress"] = sanitize_email(seed)
    with render_latex(exam_data, settings) as pdf:
        pdf = Pdf.open(BytesIO(pdf))
        pdf.save(os.path.join(out, exam + ".pdf"))
        pdf.close()
コード例 #13
0
def test_pypdf2_issue_361(private):
    with gzip.open(str(private / 'pypdf2_issue_361.pdf.gz'), 'rb') as gz:
        with pytest.raises(PdfError, match=r'trailer'):
            Pdf.open(gz)
コード例 #14
0
import pandas as pd
import numpy as np
import PyPDF2
import textract
import re
from pikepdf import Pdf

filename ='cba_2020_annual_report.pdf'
pdfFileObj = open(filename,'rb')               #open allows you to read the file
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)   #The pdfReader variable is a readable object that will be parsed
if pdfReader.isEncrypted:
    with Pdf.open(filename,password="******") as pdf:
        pdf.save("new"+filename)
        filename = "new"+filename
        pdfFileObj = open(filename,'rb')               #open allows you to read the file
        pdfReader = PyPDF2.PdfFileReader(pdfFileObj)   #The pdfReader variable is a readable object that will be parsed
    #pdfReader.decrypt('')
#num_pages = pdfReader.numPages                 #discerning the number of pages will allow us to parse through all the pages
num_pages = 58
print(pdfReader.numPages)

count = 55
text = ""
                                                            
while count < num_pages:                       #The while loop will read each page
    pageObj = pdfReader.getPage(count)
    count +=1
    text += pageObj.extractText()
    
#Below if statement exists to check if the above library returned #words. It's done because PyPDF2 cannot read scanned files.
コード例 #15
0
def test_empty(outdir):
    target = outdir / 'empty.pdf'
    target.touch()
    with pytest.raises(PdfError):
        Pdf.open(target)
コード例 #16
0
def graph(resources):
    # Has XMP and docinfo, all standard format XMP
    return Pdf.open(resources / 'graph.pdf')
コード例 #17
0
 def test_open_pdf_wrong_password(self, resources):
     # The correct passwords are "owner" and "user"
     with pytest.raises(PasswordError):
         Pdf.open(resources / 'graph-encrypted.pdf', password='******')
コード例 #18
0
def trivial(resources):
    # Has no XMP or docinfo
    return Pdf.open(resources / 'pal-1bit-trivial.pdf')
コード例 #19
0
 def test_open_pdf_no_password_but_needed(self, resources):
     with pytest.raises(PasswordError):
         Pdf.open(resources / 'graph-encrypted.pdf')
コード例 #20
0
def invalid_creationdate(resources):
    # Has nuls in docinfo, old PDF
    return Pdf.open(resources / 'invalid_creationdate.pdf')
コード例 #21
0
 def test_no_text_stream(self, resources):
     with pytest.raises(TypeError):
         with (resources / 'pal-1bit-trivial.pdf').open('r') as stream:
             Pdf.open(stream)
コード例 #22
0
 def test_memory(self, resources):
     pdf = (resources / 'pal-1bit-trivial.pdf').read_bytes()
     with pytest.raises(Exception):
         pdf = Pdf.open(pdf)
コード例 #23
0
ファイル: test_sanity.py プロジェクト: xjqx05/pikepdf
def test_attr_access(resources):
    pdf = Pdf.open(resources / 'graph.pdf')
    assert int(pdf.root.Pages.Count) == 1
コード例 #24
0
def trivial(resources):
    return Pdf.open(resources / 'pal-1bit-trivial.pdf')
コード例 #25
0
def test_overwrite_input(resources, outdir):
    copy(resources / 'sandwich.pdf', outdir / 'sandwich.pdf')
    with Pdf.open(outdir / 'sandwich.pdf') as p:
        with pytest.raises(ValueError, match=r'overwrite input file'):
            p.save(outdir / 'sandwich.pdf')
コード例 #26
0
def test_non_filename():
    with pytest.raises(TypeError):
        Pdf.open(42)
コード例 #27
0
ファイル: test_foreign.py プロジェクト: sahwar/pikepdf
def vera(resources):
    # Has XMP but no docinfo
    return Pdf.open(resources / 'veraPDF test suite 6-2-10-t02-pass-a.pdf')
コード例 #28
0
def test_not_existing_file():
    with pytest.raises(FileNotFoundError):
        Pdf.open('does_not_exist.pdf')
コード例 #29
0
ファイル: test_parsers.py プロジェクト: victor8733/pikepdf
def test_open_pdf(resources):
    pdf = Pdf.open(resources / 'graph.pdf')
    page = pdf.pages[0]
    Object._parse_stream(page, PrintParser())
コード例 #30
0
def enron1(resources):
    # Has nuls in docinfo, old PDF
    return Pdf.open(resources / 'enron1_gs.pdf')