Ejemplo n.º 1
0
    def _fetch(self):
        response = make_request(
            self.url,
            "GET",
            useragent=
            "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; rv:11.0)"
        )
        with response as file:
            text = decode_all(file)

        html = BeautifulSoup(text, features="html.parser")
        #data = [(e.parent.parent["class"], e) for e in html.select("div[data-image-url]")]
        #
        #first = data[0]
        #if   "AdaptiveMedia-doublePhoto" in first[0]: amount = 2
        #elif "AdaptiveMedia-triplePhoto" in first[0]: amount = 3
        #elif "AdaptiveMedia-quadPhoto"   in first[0]: amount = 4
        #else: amount = 1 # TODO Check if format was changed
        #
        #self.src.append(first[1]["data-image-url"])
        #if amount > 1:
        #    self.meta["additional"] = [e[1]["data-image-url"] for i, e in enumerate(data) if 0 < i < amount]

        index = int(self._regm.group("photo_nr") or 1) - 1
        data = html.select("div[tabindex='0']")[0].select(
            "div[data-image-url]")[index]
        self.src.append(data["data-image-url"])
Ejemplo n.º 2
0
    def fetch(self, update_sources=True, probe=False, ratelimit_retry=False):
        try:
            if update_sources and probe:
                make_request(self.url, 'HEAD', ratelimit_retry)
            if update_sources:
                self._fetch()
        except urllib.error.HTTPError as e:
            self.status = e.code
        except urllib.error.URLError as e:
            self.status = 400
        except ssl.CertificateError:
            self.status = 495
        except Exception as e:
            self.exception = e
            self.status = 400

        if type(self) == Other: self.status == 200
Ejemplo n.º 3
0
    def _fetch(self):
        response = make_request(self.url, "GET")
        with response as file:
            text = decode_all(file)

        html = BeautifulSoup(text, features="html.parser")
        data = html.select("a.thumb_image")[0]
        url = "http://e-shuushuu.net" + data["href"]
        self.src.append(url)
Ejemplo n.º 4
0
    def _fetch(self):
        response = make_request("https://www.zerochan.net/full/" +
                                self._regm.group("id"),
                                "GET")  # TODO Can't access nsfw pictures
        with response as file:
            text = decode_all(file)

        html = BeautifulSoup(text, features="html.parser")
        data = html.select('img[alt*="Tags"]')[0]
        self.src.append(data["src"])
Ejemplo n.º 5
0
    def _fetch(self):
        url = "https://safebooru.org/index.php?page=dapi&s=post&q=index&limit=1&id=" + self._regm.group(
            "id")
        response = make_request(url, "GET")
        with response as file:
            text = decode_all(file)

        data = minidom.parseString(text).childNodes[0].childNodes[0]

        self.src.append("http:" + data.getAttribute("file_url"))
        source = data.getAttribute("source")
        if source and validators.url(source): self.src.append(source)
        rating = data.getAttribute("rating")
        if rating: self.meta["rating"] = rating
Ejemplo n.º 6
0
    def _fetch(self):
        response = make_request(
            "https://danbooru.donmai.us/posts/" + self._regm["id"] + ".json",
            "GET")
        with response as file:
            text = decode_all(file)
            data = json.loads(text)
            if "tag_string_artist" in data:
                self.meta["author"] = data["tag_string_artist"]
            if "tag_string_character" in data:
                characters = data["tag_string_character"].strip()
                if characters: self.meta["character"] = characters.split(" ")

            self.meta["rating"] = data["rating"]
            self.meta["uid"] = data["md5"]
            self.src.append(data["file_url"])
            if "source" in data:
                dsrc = data["source"]
                if validators.url(dsrc):
                    self.src.append(dsrc)
Ejemplo n.º 7
0
    def download_img(source):

        img = source.get("img", None)
        meta = source.get("meta", {})
        uid = meta.get("uid", None)
        if not img:
            log("No image supplied for\n" + str(source))
            return
        if not uid:
            log("No uid for\n" + str(source))

        try:
            url = urlparse(img)
            filename = os.path.basename(url.path)
            _, ext = os.path.splitext(filename)
            tmpfile = Path(ARGS.out_folder) / (filename + ".tmp")
            imgfile = Path(ARGS.out_folder) / (str(UUID(uid)) + ext)

            response = make_request(img, "GET")
            header = response.info()
            cnt_type = header["Content-Type"]
            if cnt_type not in ("image/jpeg", "image/png"):
                log("Unknown content type", cnt_type, "for", img)
                return
            size = int(header["Content-Length"])
            size_mb = size / 1_000_000
            if size_mb > ARGS.max_filesize:
                log("%s is too big! You specified a maximum size of %d MB, file is %.2f MB"
                    % (img, ARGS.max_filesize, size_mb))
                return

            total_chunks = math.ceil(size / config.download_chunk_size)
            with response as stream:
                with open(tmpfile, "wb") as outf:
                    log("Starting download of", img)
                    for _ in atpbar.atpbar(range(total_chunks), name=img):
                        #for _ in range(total_chunks):
                        chunk = stream.read(config.download_chunk_size)
                        if not chunk:
                            break
                        outf.write(chunk)

            os.rename(tmpfile, imgfile)

            xmpfile = imgfile.with_suffix(".xmp")
            cute_meta = CuteMeta.from_file(xmpfile)
            cute_meta.clear()  # Delete all unwanted tags

            log("Generating image hash for file", imgfile)
            cute_meta.hash = hash_img(imgfile)
            log("Hashed", imgfile, "as", cute_meta.hash, "(phash, 16)")

            cute_meta.read_from_dict(meta, ignore_missing_keys=True)
            cute_meta.add_characters(*meta.get("character", []))
            cute_meta.source = img
            cute_meta.source_other = source.get("src", [])
            cute_meta.source_via = source.get("via", [])
            cute_meta.date = datetime.utcnow()
            cute_meta.write()

        except urllib.error.HTTPError as e:
            status = e.code
        except urllib.error.URLError as e:
            status = 400
        except ssl.CertificateError:
            status = 495
        except Exception as e:
            log("An exception occured while fetching url %s: %s" %
                (img, str(e)))
            status = 0
        else:
            status = 200

        if status and status != 200:
            log("%s: %s" % (status, img))