Esempio n. 1
0
	def download_chapter(self, chapter, download_directory, download_name):
		files = []
		warnings = []

		logging.debug('Downloading chapter {}.'.format(chapter["url"]))
		page = BeautifulSoup(self.open_url(chapter["url"]))
		scripts = page.find_all("script")
		for script in scripts:
			if re.search(r'var pages', script.text):
				matches = re.findall(r'"image":"(.*?)"', script.text)
				image_count = len(matches)
				for image_name, match in enumerate(matches, start=1):
					print_info("Download: Page {0:04d} / {1:04d}".format(image_name, image_count))
					image_url = 'http://dynasty-scans.com/' + match
					file_extension = re.search(r'.*\.([A-Za-z]*)', image_url).group(1)
					req = urllib.request.Request(image_url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36', 'Accept-encoding': 'gzip'})

					try:
						response = urllib.request.urlopen(req)
					except urllib.error.HTTPError as e:
						print_info('WARNING: Unable to download file ({}).'.format(str(e)))
						warnings.append('Download of page {}, chapter {:g}, series "{}" failed.'.format(image_name, chapter["chapter"], self.series_info('title')))
						continue

					filename = '{}/{:06d}.{}'.format(download_directory, image_name, file_extension)
					f = open(filename, 'wb')
					f.write(response.read())
					f.close()
					files.append(filename)
				break

		filename = download_directory + '/' + download_name
		self.zip_files(files, filename)

		return warnings
Esempio n. 2
0
	def download_chapter(self, chapter, download_directory, download_name):
		files = []
		warnings = []

		logging.debug('Downloading chapter {}.'.format(chapter["url"]))
		page = BeautifulSoup(self.open_url(chapter["url"].encode('ascii', 'ignore').decode('utf-8')))
		scripts = page.find("div", {"id": "containerRoot"}).find_all('script')
		for script in scripts:
			if re.search(r'lstImages', script.text):
				matches = re.findall(r'lstImages\.push\(".*"\);', script.text)
				image_count = len(matches)
				for image_name, match in enumerate(matches, start=1):
					print_info("Download: Page {0:04d} / {1:04d}".format(image_name, image_count))
					image_url = re.search(r'lstImages\.push\("(.*)"\);', match).group(1)
					file_extension = re.search(r'.*\.([A-Za-z]*)', image_url).group(1)
					req = urllib.request.Request(image_url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36', 'Accept-encoding': 'gzip'})

					try:
						response = urllib.request.urlopen(req)
					except urllib.error.HTTPError as e:
						print_info('WARNING: Unable to download file ({}).'.format(str(e)))
						warnings.append('Download of page {}, chapter {:g}, series "{}" failed.'.format(image_name, chapter["chapter"], self.series_info('title')))
						continue

					filename = '{}/{:06d}.{}'.format(download_directory, image_name, file_extension)
					f = open(filename, 'wb')
					f.write(response.read())
					f.close()
					files.append(filename)
				break

		filename = download_directory + '/' + download_name
		self.zip_files(files, filename)
		return warnings
Esempio n. 3
0
	def __init__(self, url, server=None):
		self.url = url
		if server == None:
			self.server = None
		elif server in ['img1', 'img2', 'img3', 'img4']:
			self.server = server
		else:
			print_info('Invalid server selection.')
			self.server = None
		
		if re.match(r'.*bato\.to/comic/.*', url):
			self.page = BeautifulSoup(self.open_url(url))
			self.init_with_chapter = False
			logging.debug('Object initialized with series')
		elif re.match(r'.*bato\.to/read/.*', url):
			try:
				self.page = BeautifulSoup(self.open_url(self.chapter_series(url)))
			except IndexError:
				print_info('ERROR: Unable to scrape chapter \'{}\'. If this is a new release, please try again later (Batoto bug).'.format(self.url))
				self.page = None
			self.init_with_chapter = True
			logging.debug('Object initialized with chapter')
		else:
			self.page = None
			self.init_with_chapter = False
			logging.debug('Empty object initialized')
		logging.debug('Object created with ' + url)
Esempio n. 4
0
    def download_chapter(self, chapter, download_directory, download_name):
        files = []
        warnings = []
        logging.debug('\n************************************************')
        logging.debug('Downloading chapter {}.'.format(chapter["url"]))
        page = BeautifulSoup(self.open_url(chapter["url"].encode('ascii', 'ignore').decode('utf-8')), "html.parser")
        scripts = page.find_all('script')
        # TODO
        chapter_name = chapter["url"].strip('/').split('/')
        chapter_name = chapter_name[len(chapter_name) - 1]
        image_name = 1
        for script in scripts:
            if re.search(r'(var slides_page_path = \[")(.+)("\];)', script.text):
                image_url = re.search(r'(var slides_page_path = \[")(.+)("\];)', script.text).group(2)
                need_short = 1
            elif re.search(r'(var slides_page_url_path = \[")(.+)("\];)', script.text):
                image_url = re.search(r'(var slides_page_url_path = \[")(.+)("\];)', script.text).group(2)
                need_short = 0
            else:
                continue

            image_urls = image_url.split('","')
            if need_short == 1:
                image_urls = sorted(image_urls, key=cmp_to_key(cmp_items))

            for image_url in image_urls:
                if image_url == '':
                    continue

                file_extension = re.search(r'.*\.([A-Za-z]*)', image_url).group(1)
                logging.debug('Downloading image ' + image_url)
                req = urllib.request.Request(image_url, headers={
                    'User-agent': self.default_user_agent(),
                    'Accept-encoding': 'gzip'})
                try:
                    response = urllib.request.urlopen(req)
                except urllib.error.HTTPError as e:
                    print_info('WARNING: Unable to download file ({}).'.format(str(e)))
                    warnings.append(
                        'Download of page {}, chapter {:g}, series "{}" failed.'.format(image_name, chapter["chapter"],
                                                                                        self.series_info('title')))
                    continue
                filename = '{}/{}-{:06d}.{}'.format(download_directory, chapter_name, image_name, file_extension)
                f = open(filename, 'wb')
                f.write(response.read())
                f.close()
                logging.debug('Saved image ' + filename)
                files.append(filename)
                image_name += 1
            break

        filename = download_directory + '/' + download_name
        self.zip_files(files, filename)
        logging.debug('Finished {} Chapter'.format(chapter_name))

        return warnings
Esempio n. 5
0
    def download_chapter(self, chapter, download_directory, download_name):
        files = []
        warnings = []

        logging.debug('Downloading chapter {}.'.format(chapter["url"]))
        page = BeautifulSoup(self.open_url(chapter["url"]), "html.parser")
        scripts = page.find_all("script")
        for script in scripts:
            if re.search(r'var pages', script.text):
                matches = re.findall(r'"image":"(.*?)"', script.text)
                image_count = len(matches)
                for image_name, match in enumerate(matches, start=1):
                    print_info("Download: Page {0:04d} / {1:04d}".format(image_name, image_count))
                    image_url = 'http://dynasty-scans.com/' + match
                    file_extension = re.search(r'.*\.([A-Za-z]*)', image_url).group(1)
                    req = urllib.request.Request(image_url, headers={
                        'User-agent': self.default_user_agent(),
                        'Accept-encoding': 'gzip'})

                    try:
                        response = urllib.request.urlopen(req)
                    except urllib.error.HTTPError as e:
                        print_info('WARNING: Unable to download file ({}).'.format(str(e)))
                        warnings.append('Download of page {}, chapter {:g}, series "{}" failed.'.format(image_name,
                                                                                                        chapter[
                                                                                                            "chapter"],
                                                                                                        self.series_info(
                                                                                                            'title')))
                        continue

                    filename = '{}/{:06d}.{}'.format(download_directory, image_name, file_extension)
                    f = open(filename, 'wb')
                    f.write(response.read())
                    f.close()
                    files.append(filename)
                break

        filename = download_directory + '/' + download_name
        self.zip_files(files, filename)

        return warnings
Esempio n. 6
0
	def zip_files(files, filename):
		zipf = zipfile.ZipFile(filename, mode="w")
		for f in files:
			zipf.write(f, os.path.basename(f))
			os.remove(f)
		print_info("Zip created: " + filename.replace(os.environ['HOME'], "~"))
Esempio n. 7
0
	def download_chapter(self, chapter, download_directory, download_name):
		chapter_url = chapter["url"]
		logging.debug('Downloading chapter {}.'.format(chapter_url))
		chapter = BeautifulSoup(self.open_url(chapter_url))
		files = []
		warnings = []

		try:
			page_urls = chapter.find("select", {"name": "page_select"}).find_all("option")
			pages = [page["value"] for page in page_urls]
			logging.debug('Per page mode')
			image_count = len(pages)

			for image_name, page_url in enumerate(pages, start=1):
				print_info("Download: Page {0:04d} / {1:04d}".format(image_name, image_count))
				page = BeautifulSoup(self.open_url(page_url))
				url = page.find("div", {"id": "full_image"}).find("img")["src"]
				if self.server != None:
					url = 'http://{}.bato.to{}'.format(self.server, re.search(r'.*\.bato\.to(.*)', url).group(1))
				file_extension = re.search(r'.*\.([A-Za-z]*)', url).group(1)

				req = urllib.request.Request(url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36', 'Accept-encoding': 'gzip'})
				try:
					logging.debug('Downloading img {}'.format(url))
					response = urllib.request.urlopen(req)
				except urllib.error.HTTPError as e:
					print_info('WARNING: Unable to download file ({}).'.format(str(e)))
					warnings.append('Download of page {}, chapter {:g}, series {} failed.'.format(image_name, chapter["chapter"], self.series_info('title')))

				filename = '{}/{:06d}.{}'.format(download_directory, image_name, file_extension)
				f = open(filename, 'wb')
				f.write(response.read())
				f.close()
				files.append(filename)
		except AttributeError:
			logging.debug('Long strip mode')
			page = BeautifulSoup(self.open_url(chapter_url))
			images = page.find_all('img', src=re.compile("img[0-9]*\.bato\.to/comics/.*/.*/.*/.*/read.*/"))
			image_count = len(images)

			for image_name, image in enumerate(images, start=1):
				print_info("Download: Page {0:04d} / {1:04d}".format(image_name, image_count))
				url = image['src']
				file_extension = re.search(r'.*\.([A-Za-z]*)', url).group(1)
				req = urllib.request.Request(url, headers={'User-agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36', 'Accept-encoding': 'gzip'})

				try:
					response = urllib.request.urlopen(req)
				except urllib.error.HTTPError as e:
					print_info('WARNING: Unable to download file ({}).'.format(str(e)))
					warnings.append('Download of page {}, chapter {:g}, series "{}" failed.'.format(image_name, chapter["chapter"], series_info('title')))
					continue

				filename = '{}/{:06d}.{}'.format(download_directory, image_name, file_extension)
				f = open(filename, 'wb')
				f.write(response.read())
				f.close()
				files.append(filename)

		filename = download_directory + '/' + download_name
		self.zip_files(files, filename)

		return warnings