def postprocess(self, workdir, db): '''Perform postprocessing''' for url, processor in self.postprocessors: filepath = None if url: filename, _ = extract_meta_from_headers(url) filepath = join(workdir, filename) processor(db, filepath)
def postprocess(self, workdir, db, only=None): '''Perform postprocessing''' for url, processor in self.postprocessors: if only is not None and processor.__name__ != only: continue filepath = None if url: filename, _ = extract_meta_from_headers(url) filepath = join(workdir, filename) processor(db, filepath)
def download(ctx): '''Download sources datasets''' title('Downloading required datasets') if not exists(DL_DIR): os.makedirs(DL_DIR) urls = (level.urls for level in ctx.obj['levels'] if level.urls) urls = set([url for lst in urls for url in lst]) for url in urls: info('Dealing with {0}'.format(url)) filename, size = extract_meta_from_headers(url) info('Downloading {0}'.format(filename)) with click.progressbar(length=size) as bar: def reporthook(blocknum, blocksize, totalsize): read = blocknum * blocksize if read <= 0: return if read > totalsize: bar.update(size) else: bar.update(read) urlretrieve(url, join(DL_DIR, filename), reporthook=reporthook)