Esempio n. 1
0
def read_soup(page):
    '''
    Reads a web page and soupifies it
    '''
    # Read the page
    url, html, cookie_jar = fetch_url( page )
    # Parse the page
    return bs4.BeautifulSoup( html )
Esempio n. 2
0
def read_soup(page):
    '''
    Reads a web page and soupifies it
    '''
    # Read the page
    html = fetch_url(page)[1]
    # Parse the page
    return bs4.BeautifulSoup(html)
Esempio n. 3
0
def save_file(filename, url):
    k = 1
    while True:
        try:
            url, data_blob, cookies = fetch_url( url )
            break
        except urllib2.HTTPError:
            logger.error('Could not read PDF: %s DOC: %s' % ( url, filename))
            k += 1
            if k == MAX_ATTEMPTS:
                raise DREError('Couldn\'t get the PDF: %s' % url )
            logger.debug('Sleeping 2 secs...')
            time.sleep(2)

    with open(filename, 'wb') as f:
        f.write(data_blob)
        f.close()
Esempio n. 4
0
 def read_bdp_file(self):
     url, payload, cj = fetch_url( BDP_SOURCE_URL )
     return csv.reader( StringIO.StringIO( payload ), delimiter=';')
Esempio n. 5
0
def save_file(filename, url):
    k = 1
    data_blob=fetch_url(url)[1]
    with open(filename, 'wb') as f:
        f.write(data_blob)
        f.close()