def __call__(self, url=None, filename=None, ext=None, formatter_kwargs={}, compressed=False): original_ext = None if filename is None: base = md5(url) else: base, original_ext = splitext(filename) # We favor the extension found in given filename, else we fallback # on the provided one if any (usually inferred from http response) ext = original_ext if original_ext else (ext or '') if self.template is not None: try: filename = self.formatter.format(self.template, value=base, ext=ext, **formatter_kwargs) except Exception as e: raise FilenameFormattingError(reason=e, template=self.template) else: filename = base + ext if self.folder_strategy: filename = self.folder_strategy(filename, url=url) if compressed: filename += '.gz' return filename
def format_page_filename(webentity, page): h = md5(page['url']) # TODO: could be something other than html? return '%s/%s/%s.html.gz' % (webentity['id'], h[:2], h)