Example #1
0
def process_pdf(parser, pdfurl, errors):
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        pdfcontent = scraperwiki.scrape(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
        pdfcontent = None
    except ValueError, e:
        errors.append(e)
def process_pdf(parser, pdfurl, errors):
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        pdfcontent = scraperwiki.scrape(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
        pdfcontent = None
    except ValueError, e:
        errors.append(e)
def process_pdf(parser, pdfurl, errors):
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        pdfcontent = scraperwiki.scrape(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
        pdfcontent = None
    except ValueError, e:  # Some PDFs can not be parsed!  This should be investigated
        print "PDF format problem"
        errors.append(e)
def process_pdf(parser, pdfurl, errors):
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        pdfcontent = scraperwiki.scrape(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
        pdfcontent = None
    except ValueError, e: # Some PDFs can not be parsed!  This should be investigated
        print "PDF format problem"
        errors.append(e)
def process_pdf(parser, pdfurl, errors):
    errors = []
    postlistelib.exit_if_no_cpu_left(0, callback=no_cpu_left, arg = errors)
    try:
        pdfcontent = lazycache.lazycache(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
#    except ValueError, e:
#        errors.append(e)
    except IndexError, e:
        errors.append(e)
def process_pdf(parser, pdfurl, errors):
    errors = []
    postlistelib.exit_if_no_cpu_left(0, callback=no_cpu_left, arg = errors)
    try:
        pdfcontent = lazycache.lazycache(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
#    except ValueError, e:
#        errors.append(e)
    except IndexError, e:
        errors.append(e)
Example #7
0
def process_pdf(parser, pdfurl, errors):
    if parser.is_already_scraped(pdfurl):
        return
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        pdfcontent = scraperwiki.scrape(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
        pdfcontent = None
    except ValueError, e:
        print e
        errors.append(e)
def process_pdf(parser, pdfurl, errors):
    if parser.is_already_scraped(pdfurl):
        return
    postlistelib.exit_if_no_cpu_left(0, out_of_cpu, errors)
    try:
        pdfcontent = scraperwiki.scrape(pdfurl)
        parser.preprocess(pdfurl, pdfcontent)
        pdfcontent = None
    except ValueError, e:
        print e
        errors.append(e)
Example #9
0
def fetch_and_preprocess(parser, pdfurl):
    pdfcontent = postlistelib.fetch_url_harder(pdfurl)
    parser.preprocess(pdfurl, pdfcontent)
    pdfcontent = None
def fetch_and_preprocess(parser, pdfurl):
    pdfcontent = postlistelib.fetch_url_harder(pdfurl)
    parser.preprocess(pdfurl, pdfcontent)
    pdfcontent = None