def process_pdf(parser, pdfurl, errors):
    errors = []
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        parser.fetch_and_preprocess(pdfurl)
#    except ValueError, e:
#        errors.append(e)
    except IndexError, e:
        errors.append(e)
def process_pdf(parser, pdfurl, errors):
    errors = []
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        parser.fetch_and_preprocess(pdfurl)


#    except ValueError, e:
#        errors.append(e)
    except IndexError, e:
        errors.append(e)
예제 #3
0
#parser.debug = True

if False:
    pdfurl = "http://www.stortinget.no/Global/pdf/postjournal/pj-2010-06-04-05.pdf"
    parse_pdf(pdfurl)
    exit(0)

pdfurls = []
add_pdf_lists(parser, pdfurls)

# Fetch all journal PDFs
errors = []
for pdfurl in pdfurls:
    postlistelib.exit_if_no_cpu_left(0, callback=no_cpu_left, arg=errors)
    try:
        parser.fetch_and_preprocess(pdfurl)
    except ValueError, e:
        errors.append(e)
    except IndexError, e:
        errors.append(e)
try:
    parser.process_pages()
except ValueError, e:
    errors.append(e)
except IndexError, e:
    errors.append(e)

report_errors(errors)

# Based on the scraper advanced-scraping-pdf
# See also
#parser.debug = True

if False:
    pdfurl = "http://www.stortinget.no/Global/pdf/postjournal/pj-2010-06-04-05.pdf"
    parse_pdf(pdfurl)
    exit(0)

pdfurls = []
add_pdf_lists(parser, pdfurls)

# Fetch all journal PDFs
errors = []
for pdfurl in pdfurls:
    postlistelib.exit_if_no_cpu_left(0, callback=no_cpu_left, arg = errors)
    try:
        parser.fetch_and_preprocess(pdfurl)
    except ValueError, e:
        errors.append(e)
    except IndexError, e:
        errors.append(e)
try:
    parser.process_pages()
except ValueError, e:
    errors.append(e)
except IndexError, e:
    errors.append(e)

report_errors(errors)

# Based on the scraper advanced-scraping-pdf
# See also