Python get_webpage_text Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: get_webpage_text

Examples at hotexamples.com: 4

Python get_webpage_text - 4 examples found. These are the top rated real world Python examples of utils.get_webpage_text extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: get_html.py Project: JH27/crawlers

def get_page(assembly_id, bill_id, link_id, field):
    url = '%s%s' % (BASEURL[field], link_id)
    outp = '%s/%s/%s.html' % (DIR[field], assembly_id, bill_id)

    i = 0
    while i==0 or ('TEXTAREA ID="MSG" STYLE="display:none"' in doc and i<10):
        try:
            doc = utils.get_webpage_text(url)
        except requests.exceptions.RequestException:
            continue
        i += 1

    with open(outp, 'w') as f:
        f.write(doc)

Example #2

Show file

def get_page(assembly_id, bill_id, link_id, field):
    url = '%s%s' % (BASEURL[field], link_id)
    outp = '%s/%s/%s.html' % (DIR[field], assembly_id, bill_id)

    i = 0
    while i == 0 or ('TEXTAREA ID="MSG" STYLE="display:none"' in doc
                     and i < 10):
        try:
            doc = utils.get_webpage_text(url)
        except requests.exceptions.RequestException:
            continue
        i += 1

    with open(outp, 'w') as f:
        f.write(doc)

Example #3

Show file

File: html.py Project: JH27/crawlers

    def get_page(baseurl, page, directory, npages):
        try:
            url = baseurl + '&PAGE=%d&PAGE_SIZE=%d' % (page, PAGE_SIZE)
            pn = npages - page + 1
            fn = '%s/%d.html' % (directory, pn)

            is_first = True
            while is_first or 'TEXTAREA ID="MSG" STYLE="display:none"' in doc:
                doc = utils.get_webpage_text(url)
                is_first = False

            with open(fn, 'w') as f:
                f.write(doc)

            sys.stdout.write('%s\t' % pn)
            sys.stdout.flush()

        except (requests.exceptions.RequestException, IOError) as e:
            print '\nFailed to get %s due to %s' % (fn, e.__repr__)

Example #4

Show file

File: html.py Project: winnersky/crawlers

    def get_page(baseurl, page, directory, npages):
        try:
            url = baseurl + '&PAGE=%d&PAGE_SIZE=%d' % (page, PAGE_SIZE)
            pn = npages - page + 1
            fn = '%s/%d.html' % (directory, pn)

            is_first = True
            while is_first or 'TEXTAREA ID="MSG" STYLE="display:none"' in doc:
                doc = utils.get_webpage_text(url)
                is_first = False

            with open(fn, 'w') as f:
                f.write(doc)

            sys.stdout.write('%s\t' % pn)
            sys.stdout.flush()

        except (requests.exceptions.RequestException, IOError) as e:
            print '\nFailed to get %s due to %s' % (fn, e.__repr__)