Python count_pages Beispiele

Programmiersprache: Python

Namespace / Paketname: util

Methode / Funktion: count_pages

Beispiele auf hotexamples.com: 8

Python count_pages - 8 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die util.count_pages, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: tasks.py Projekt: ddohler/webocr

def document_analysis(docid):
    #TODO: Check for multiple objects?
    doc = util.is_valid_doc(docid)
    
    doc.file_format = util.determine_format(doc)
    ### Counting pages and repairing damaged documents ###
    num_pages = util.count_pages(doc)
    #TODO: The repair command doesn't quite work; need to make a copy first
    # or update the object's field.
    #if num_pages == -1 and doc.file_format == 'pdf':
        # Try to repair damaged PDF
    #    cmd = ['pdftk', MEDIA_ROOT+doc.doc_file, 'output', MEDIA_ROOT+doc.doc_file]
    #    try:
    #        subprocess.check_call(cmd)
    #    except subprocess.CalledProcessError as e:
    #        print(e)
            #TODO: More error handling if necessary

        #Try again
    #    num_pages = util.count_pages(doc)
        #If it's still undetectable there's not much more we can do
        #TODO: Report error, image cannot be processed.

    if doc.file_format == 'pdf':
        #Counting the number of pages may fail; PyPdf doesn't handle corrupt
        #PDFs well.
        num_imgs = util.count_images(doc)
        has_text = util.detect_text(doc)
    else:
        num_imgs = num_pages #For TIFFS num_pages might be >1
        has_text = False

    # Decide what to do
    if has_text == False and num_imgs == num_pages: #Simple case
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_images.delay(docid)
    elif has_text == True and num_imgs == 0: #Nothing to OCR
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_rasterize.delay(docid) #Rasterize and output page images
    elif has_text == True and num_imgs > 0: #Mixed image / text
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_rasterize.delay(docid) #For now, rasterize pages, then OCR
    else: #Fallback to rasterization
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_rasterize.delay(docid) #rasterize and OCR

    doc.num_pages = num_pages
    doc.save()

Beispiel #2

Datei anzeigen

def document_analysis(docid):
    #TODO: Check for multiple objects?
    doc = util.is_valid_doc(docid)

    doc.file_format = util.determine_format(doc)
    ### Counting pages and repairing damaged documents ###
    num_pages = util.count_pages(doc)
    #TODO: The repair command doesn't quite work; need to make a copy first
    # or update the object's field.
    #if num_pages == -1 and doc.file_format == 'pdf':
    # Try to repair damaged PDF
    #    cmd = ['pdftk', MEDIA_ROOT+doc.doc_file, 'output', MEDIA_ROOT+doc.doc_file]
    #    try:
    #        subprocess.check_call(cmd)
    #    except subprocess.CalledProcessError as e:
    #        print(e)
    #TODO: More error handling if necessary

    #Try again
    #    num_pages = util.count_pages(doc)
    #If it's still undetectable there's not much more we can do
    #TODO: Report error, image cannot be processed.

    if doc.file_format == 'pdf':
        #Counting the number of pages may fail; PyPdf doesn't handle corrupt
        #PDFs well.
        num_imgs = util.count_images(doc)
        has_text = util.detect_text(doc)
    else:
        num_imgs = num_pages  #For TIFFS num_pages might be >1
        has_text = False

    # Decide what to do
    if has_text == False and num_imgs == num_pages:  #Simple case
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_images.delay(docid)
    elif has_text == True and num_imgs == 0:  #Nothing to OCR
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_rasterize.delay(docid)  #Rasterize and output page images
    elif has_text == True and num_imgs > 0:  #Mixed image / text
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_rasterize.delay(docid)  #For now, rasterize pages, then OCR
    else:  #Fallback to rasterization
        #print "Pages: %d, Images: %d, Text: %d" %(num_pages,num_imgs,has_text)
        pages_from_rasterize.delay(docid)  #rasterize and OCR

    doc.num_pages = num_pages
    doc.save()

Beispiel #3

Datei anzeigen

Datei: comment.py Projekt: synee/abillist

def count_pages():
    return util.count_pages(db.Query(Comment).count())

Beispiel #4

Datei anzeigen

Datei: comment.py Projekt: neuront/nijipress

def count_pages():
    return util.count_pages(db.Query(Comment).count())

Beispiel #5

Datei anzeigen

Datei: post.py Projekt: neuront/nijipress

def count_pages_by_tag(t):
    return util.count_pages(db.Query(tag.TagPostR).filter('tag =', t).count())

Beispiel #6

Datei anzeigen

Datei: post.py Projekt: neuront/nijipress

def count_pages():
    return util.count_pages(db.Query(Post).count())

Beispiel #7

Datei anzeigen

Datei: post.py Projekt: neuront/nijinote

def count_pages_by_tag(t):
    return util.count_pages(db.Query(tag.TagPostR).filter('tag =', t).count())

Beispiel #8

Datei anzeigen

Datei: post.py Projekt: neuront/nijinote

def count_pages():
    return util.count_pages(db.Query(Post).count())