Exemple #1
0
    def __call__(self,
                 url=None,
                 filename=None,
                 ext=None,
                 formatter_kwargs={},
                 compressed=False):
        original_ext = None

        if filename is None:
            base = md5(url)
        else:
            base, original_ext = splitext(filename)

        # We favor the extension found in given filename, else we fallback
        # on the provided one if any (usually inferred from http response)
        ext = original_ext if original_ext else (ext or '')

        if self.template is not None:
            try:
                filename = self.formatter.format(self.template,
                                                 value=base,
                                                 ext=ext,
                                                 **formatter_kwargs)
            except Exception as e:
                raise FilenameFormattingError(reason=e, template=self.template)
        else:
            filename = base + ext

        if self.folder_strategy:
            filename = self.folder_strategy(filename, url=url)

        if compressed:
            filename += '.gz'

        return filename
Exemple #2
0
def format_page_filename(webentity, page):
    h = md5(page['url'])

    # TODO: could be something other than html?
    return '%s/%s/%s.html.gz' % (webentity['id'], h[:2], h)