Example #1
0
class Downloader:
    def __init__(self, outdir):
        self.scrpr = Scraper()
        self.scrpr.scrape()
        self.size = 0
        self.outdir = outdir

    def getFileUrls(self):
        self.fileUrls = self.scrpr.getZipLinks()

    def getFileSize(self, url, urlobj):
        meta = urlobj.info()
        file_size = int(meta.getheaders("Content-Length")[0])
        self.size += file_size

    def openUrl(self, url):
        urlobj = urllib2.urlopen(url)
        return urlobj

    def iterateThroughFiles(self):
        self.getFileUrls()
        for url in self.fileUrls:
            urlobj = self.openUrl(url)
            [dir, fileName] = self.getFileYearAndName(url)
            dirPath = self.mkDirectory(dir)
            self.getFileSize(url, urlobj)
            self.saveFile(fileName, dirPath, urlobj)
        print "Total file size is : %d"%(self.size)

    def getFileYearAndName(self, url):
        return [url.split('/')[-2], url.split('/')[-1]]

    def mkDirectory(self, dir):
        if not os.path.exists(os.path.join(self.outdir, dir) ):
            os.mkdir(os.path.join(self.outdir, dir))
        return os.path.join(self.outdir, dir)

    def saveFile(self, fileName, dirPath, urlobj):
        fp = open(os.path.join(dirPath, fileName), 'wb')
        block_size = 8192
        while True:
            buffer = urlobj.read(block_size)
            if not buffer:
                break
            fp.write(buffer)
        fp.close()
        print "Downloaded : %s"%(os.path.join(dirPath, fileName))