Ejemplo n.º 1
0
def test_save_image_location():
    assert (Comic.save_image_location(
        "http://imgs.xkcd.com/comics/barrel_cropped_(1).jpg", 1,
        "foo") == "foo/1.jpg")
    assert Comic.save_image_location("", 1, "bar") == "bar/1"
    assert (Comic.save_image_location(
        "http://imgs.xkcd.com/comics/barrel_cropped_(1).jpg?q=123", 1,
        "foo") == "foo/1.jpg")
Ejemplo n.º 2
0
 def image_in_zipfile(item, directory):
     zipfile_path = "{}.cbz".format(directory)
     if not os.path.isfile(zipfile_path):
         return False
     image_path_cbz = Comic.save_image_location(item.get("url"),
                                                item.get("page"))
     with ZipFile(zipfile_path, "r") as zipfile:
         return image_path_cbz in zipfile.namelist()
Ejemplo n.º 3
0
 def get_media_requests(self, item, info):
     click.echo("Saving image {}".format(item.get("url")))
     url, page, title, alt_text = itemgetter("url", "page", "title",
                                             "alt_text")(item)
     image_path_directory = Comic.save_image_location(
         url, page, info.spider.directory, title)
     if os.path.isfile(image_path_directory) or self.image_in_zipfile(
             item, info.spider.directory):
         click.echo("The image was already downloaded. Skipping...")
         raise DropItem("The image was already downloaded. Skipping...")
     if alt_text is not None:
         with open(
                 Comic.save_alt_text_location(page, info.spider.directory),
                 "w") as alt_text_file:
             alt_text_file.write(alt_text)
     yield scrapy.Request(
         item.get("url"),
         meta={
             "image_file_name":
             Comic.save_image_filename(item.get("url"), item.get("page"),
                                       title, info.spider.directory)
         },
     )