Python Scraper.getZipLinks Examples

Programming Language: Python

Namespace/Package Name: scraper

Class/Type: Scraper

Method/Function: getZipLinks

Examples at hotexamples.com: 1

Python Scraper.getZipLinks - 1 examples found. These are the top rated real world Python examples of scraper.Scraper.getZipLinks extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Scraper(30)

matchTag(7)

connect(6)

__init__(5)

_time_now(5)

close(5)

submit(3)

find_docs(3)

get_children(3)

create_destination(2)

extractTag(2)

get_papers(2)

begin(2)

get_all_page_uris(1)

get_all_skills(1)

get_css(1)

get_and_write_records(1)

getZipLinks(1)

get_manga(1)

get_paths(1)

get_post_data_per_page(1)

get_all_manga(1)

getGameList(1)

getSlist(1)

getQlist(1)

getInformation(1)

getIndexhtm(1)

get_prices(1)

getEvents(1)

getDepts(1)

getAppList(1)

gather_reddit_data(1)

fetch_most_recent_transactions(1)

fetch_booster_usage(1)

extractText(1)

create_organization_sets(1)

create_http_link(1)

get_price(1)

DownloadImage(1)

get_script(1)

scrape_ingredients(1)

update_submission_content(1)

store_parse(1)

stopped(1)

sort(1)

seturldata(1)

set_started_callback(1)

set_output_file(1)

set_finished_callback(1)

set_broadcast_document_callback(1)

Example #1

Show file

File: downloader.py Project: akulkarni-bk/easypatent

class Downloader:
    def __init__(self, outdir):
        self.scrpr = Scraper()
        self.scrpr.scrape()
        self.size = 0
        self.outdir = outdir

    def getFileUrls(self):
        self.fileUrls = self.scrpr.getZipLinks()

    def getFileSize(self, url, urlobj):
        meta = urlobj.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        self.size += file_size

    def openUrl(self, url):
        urlobj = urllib2.urlopen(url)
        return urlobj

    def iterateThroughFiles(self):
        self.getFileUrls()
        for url in self.fileUrls:
            urlobj = self.openUrl(url)
            [dir, fileName] = self.getFileYearAndName(url)
            dirPath = self.mkDirectory(dir)
            self.getFileSize(url, urlobj)
            self.saveFile(fileName, dirPath, urlobj)
        print "Total file size is : %d"%(self.size)

    def getFileYearAndName(self, url):
        return [url.split('/')[-2], url.split('/')[-1]]

    def mkDirectory(self, dir):
        if not os.path.exists(os.path.join(self.outdir, dir) ):
            os.mkdir(os.path.join(self.outdir, dir))
        return os.path.join(self.outdir, dir)

    def saveFile(self, fileName, dirPath, urlobj):
        fp = open(os.path.join(dirPath, fileName), 'wb')
        block_size = 8192
        while True:
            buffer = urlobj.read(block_size)
            if not buffer:
                break
            fp.write(buffer)
        fp.close()
        print "Downloaded : %s"%(os.path.join(dirPath, fileName))