Example #1
0
    def file_path(self, request, response=None, info=None):
        # check if called from image_key or file_key with url as first argument
        if not isinstance(request, Request):
            url = request
        else:
            url = request.url

        group = getattr(request, "group", None)
        try:
            if group:
                filename = "{0}{1}".format(group["urls"][request.url], self.DEFAULT_EXT)
                path = os.path.join(group["name"], filename)
            else:
                url = URL(url)
                url.scheme = ""
                _, ext = os.path.splitext(url.path.split("/")[-1])
                if not ext:
                    url.path = url.path.strip("/") + self.DEFAULT_EXT
                path = url.geturl()
        except Exception:
            path = os.path.join("err", hashlib.sha1(url).hexdigest() + self.DEFAULT_EXT)

        if request.spider.subdir:
            path = os.path.join(request.spider.subdir, path)
        return path
Example #2
0
    def file_path(self, request, response=None, info=None):
        # check if called from image_key or file_key with url as first argument
        if not isinstance(request, Request):
            url = request
        else:
            url = request.url

        group = getattr(request, "group", None)
        try:
            if group:
                filename = "{0}{1}".format(group["urls"][request.url],
                                           self.DEFAULT_EXT)
                path = os.path.join(group["name"], filename)
            else:
                url = URL(url)
                url.scheme = ''
                _, ext = os.path.splitext(url.path.split('/')[-1])
                if not ext:
                    url.path = url.path.strip('/') + self.DEFAULT_EXT
                path = url.geturl()
        except Exception:
            path = os.path.join(
                "err",
                hashlib.sha1(url).hexdigest() + self.DEFAULT_EXT)

        if request.spider.subdir:
            path = os.path.join(request.spider.subdir, path)
        return path