def crawl_feed(self, feed_url: str = '') -> list[str]: urls: list[str] = [] if not feed_url: feed_url = constants.rss_url request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( feed_url, request_dict, post=False, ) if not response: logger.error("Got no response from feed URL: {}".format(feed_url)) return urls response.encoding = 'utf-8' feed = feedparser.parse( response.text ) for item in feed['items']: if any([item['title'].startswith(category) for category in self.own_settings.accepted_rss_categories]): urls.append(item['link']) return urls
def compare_by_title(self, title: str) -> bool: request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = {'q': title} response = request_with_retries( constants.main_page, request_dict, post=False, ) if not response: return False response.encoding = 'utf-8' m = re.finditer(r'a href="/view/(\d+)/*"', response.text) matches_links = set() if m: for match in m: matches_links.add("{}{}".format( constants.gallery_container_url, match.group(1))) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def compare_by_title_search_page(self, title: str) -> bool: # https://www.fakku.net/search/+title full_url = urljoin(constants.main_url, 'search/+') + quote(title) logger.info("Querying URL: {}".format(full_url)) request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( full_url, request_dict, post=False, retries=3 ) if not response: logger.info("Got no response from server") return False response.encoding = 'utf-8' soup_1 = BeautifulSoup(response.text, 'html.parser') matches_links = set() for link_container in soup_1.find_all("div", class_=re.compile("content-meta")): title_container = link_container.find("a", class_="content-title") matches_links.add(urljoin(constants.main_url, title_container.get('href'))) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def compare_by_title(self, title: str) -> bool: request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = {'type': 'title', 'search_value': title} r = request_with_retries( urljoin(constants.main_url, 'Search'), request_dict, ) if not r: logger.info("Got no response from server") return False r.encoding = 'utf-8' soup_1 = BeautifulSoup(r.text, 'html.parser') matches_links = set() for gallery in soup_1.find_all( "div", class_=re.compile("showcase_comics_product_image_box")): link_container = gallery.find("a") if link_container: matches_links.add( urljoin(constants.main_url, link_container['href'])) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def compare_by_title(self, title: str) -> bool: request_dict = construct_request_dict(self.settings, self.own_settings) r = request_with_retries( urljoin(constants.main_url, 'search/') + quote(title), request_dict, ) if not r: logger.info("Got no response from server") return False r.encoding = 'utf-8' soup_1 = BeautifulSoup(r.text, 'html.parser') matches_links = set() # content-row manga row for gallery in soup_1.find_all("div", class_=re.compile("content-row")): link_container = gallery.find("a", class_="content-title") if link_container: matches_links.add(urljoin(constants.main_url, link_container['href'])) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def request_hath_download(self, root: str, gid: str, token: str, key: str) -> Optional[requests.models.Response]: url = root + '/archiver.php' params = {'gid': gid, 'token': token, 'or': key} # logger.info("Requesting hath download to URL: {}".format(url)) request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = params request_dict['data'] = {'hathdl_xres': 'org'} for retry_count in range(3): try: r = requests.post(url, **request_dict) return r except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e: if retry_count < 2: logger.warning( "Request failed, retrying {} of {}: {}".format( retry_count, 3, str(e))) continue else: return None return None
def get_values_from_gallery_link(self, link: str) -> Optional[GalleryData]: request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( link, request_dict, post=False, ) if not response: return None response.encoding = 'utf-8' soup = BeautifulSoup(response.text, 'html.parser') if soup: title_jpn_match = soup.find("div", id=re.compile("info")).h2 gallery_id_match = re.search( r'{}(\d+)'.format(constants.gallery_container_url), link) if not gallery_id_match: return None gallery_id = 'nh-' + gallery_id_match.group(1) gallery = GalleryData(gallery_id, self.name) gallery.title = soup.h1.get_text() gallery.title_jpn = title_jpn_match.get_text( ) if title_jpn_match else '' gallery_filecount_match = re.search(r'<div>(\d+) page(s*)</div>', response.text) if gallery_filecount_match: gallery.filecount = int(gallery_filecount_match.group(1)) else: gallery.filecount = 0 gallery.tags = [] gallery.link = link gallery.posted = dateutil.parser.parse( soup.find("time")['datetime']) for tag_container in soup.find_all("a", {"class": "tag"}): tag_name = [text for text in tag_container.stripped_strings][0] tag_name = tag_name.split(" | ")[0] tag_scope = tag_container.parent.parent.get_text() tag_ext = tag_container.parent.get_text() tag_scope = tag_scope.replace(tag_ext, "").replace( "\t", "").replace("\n", "").replace(":", "").lower() if tag_scope == 'tags': gallery.tags.append(translate_tag(tag_name)) elif tag_scope == 'categories': gallery.category = tag_name.capitalize() else: gallery.tags.append( translate_tag(tag_scope + ":" + tag_name)) else: return None return gallery
def get_galleries_from_page_links(self, page_links: Iterable[str], page_links_results: list[DataDict]) -> None: api_page_links = [] for page_link in page_links: m = re.search(r'(.+)/s/(\w+)/(\d+)-(\d+)', page_link) if not m: continue api_page_links.append( {'data': [m.group(3), m.group(2), m.group(4)]}) api_page_links_chunks = list(chunks(api_page_links, 25)) for i, group in enumerate(api_page_links_chunks): if i % 3 == 2: time.sleep(self.own_settings.wait_timer) data = { 'method': 'gtoken', 'pagelist': [x['data'] for x in group]} headers = {'Content-Type': 'application/json'} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['headers'] = {**headers, **self.settings.requests_headers} request_dict['data'] = json.dumps(data) response = request_with_retries( constants.ge_api_url, request_dict, post=True, ) if not response: continue try: response_data = response.json() except(ValueError, KeyError): logger.error("Could not parse response to JSON: {}".format(response.text)) continue for gid_token_pair in response_data['tokenlist']: discard_approved, discard_message = self.discard_gallery_by_internal_checks( gallery_id=gid_token_pair['gid'], link=link_from_gid_token_fjord(gid_token_pair['gid'], gid_token_pair['token'], False) ) if discard_approved: if not self.settings.silent_processing: logger.info(discard_message) continue page_links_results.append( {'data': (gid_token_pair['gid'], gid_token_pair['token']), 'link': link_from_gid_token_fjord(gid_token_pair['gid'], gid_token_pair['token'], False)})
def compare_by_title(self, gallery_title: str) -> bool: api_url = urljoin(self.own_settings.metadata_url, constants.api_path) logger.info("Querying URL: {}".format(api_url)) request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = {'match': True, 'title': gallery_title} response = request_with_retries( api_url, request_dict, post=False, retries=3 ) if not response: logger.info("Got no response from server") return False response_data = response.json() matches_links = set() if 'error' in response_data: logger.info("Got error from server: {}".format(response_data['error'])) return False for gallery in response_data: if 'link' in gallery: matches_links.add(gallery['link']) if 'gallery_container' in gallery and gallery['gallery_container']: if self.settings.gallery_model: gallery_container = self.settings.gallery_model.objects.filter( gid=gallery['gallery_container'], provider=gallery['provider'] ) first_gallery_container = gallery_container.first() if first_gallery_container: gallery['gallery_container_gid'] = first_gallery_container.gid if 'magazine' in gallery and gallery['magazine']: if self.settings.gallery_model: magazine = self.settings.gallery_model.objects.filter( gid=gallery['magazine'], provider=gallery['provider'] ) first_magazine = magazine.first() if first_magazine: gallery['magazine_gid'] = first_magazine.gid if 'posted' in gallery: if gallery['posted'] != 0: gallery['posted'] = datetime.fromtimestamp(int(gallery['posted']), timezone.utc) else: gallery['posted'] = None self.values_array.append(GalleryData(**gallery)) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def search_using_xml_api(self, title: str) -> bool: if not self.own_settings.api_key: logger.error("Can't use {} API without an api key. Check {}/API_MANUAL.txt".format( self.name, constants.main_page )) return False page = 1 galleries = [] while True: link = '{}/api/{}/?S=objectSearch&sn={}&page={}'.format( constants.main_page, self.own_settings.api_key, title, page ) request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( link, request_dict, post=False, ) if not response: break response.encoding = 'utf-8' # Based on: https://www.doujinshi.org/API_MANUAL.txt api_galleries = convert_api_response_text_to_gallery_dicts(response.text) if not api_galleries: break galleries.extend(api_galleries) # API returns 25 max results per query, so if we get 24 or less, means there's no more pages. # API Manual says 25, but we get 50 results normally! if len(api_galleries) < 50: break page += 1 self.values_array = galleries self.gallery_links = [x.link for x in galleries if x.link] if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def compare_by_image(self, zip_path: str, only_cover: bool) -> bool: if os.path.splitext(zip_path)[1] != '.zip': self.gallery_links = [] return False try: my_zip = zipfile.ZipFile(zip_path, 'r') except (zipfile.BadZipFile, NotImplementedError): self.gallery_links = [] return False filtered_files = get_images_from_zip(my_zip) if not filtered_files: self.gallery_links = [] return False first_file = filtered_files[0] if first_file[1] is None: with my_zip.open(first_file[0]) as current_img: first_file_sha1 = sha1_from_file_object(current_img) else: with my_zip.open(first_file[1]) as current_zip: with zipfile.ZipFile(current_zip) as my_nested_zip: with my_nested_zip.open(first_file[0]) as current_img: first_file_sha1 = sha1_from_file_object(current_img) payload = {'f_shash': first_file_sha1, 'fs_from': os.path.basename(first_file[0]), 'fs_covers': 1 if only_cover else 0, 'fs_similar': 0} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = payload r = requests.get( constants.ex_page, **request_dict ) my_zip.close() parser = SearchHTMLParser() parser.feed(r.text) self.gallery_links = list(parser.galleries) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def start_download(self) -> None: if not self.gallery or not self.gallery.link: return logger.info( "Downloading an archive from a generic HTTP server: {}".format( self.gallery.link)) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get(self.gallery.link, stream='True', **request_dict) filename = get_filename_from_cd( request_file.headers.get('content-disposition')) if not filename: if self.gallery.link.find('/'): filename = self.gallery.link.rsplit('/', 1)[1] if not filename: logger.error("Could not find a filename for link: {}".format( self.gallery.link)) self.return_code = 0 self.gallery.title = filename.replace(".zip", "") self.gallery.filename = replace_illegal_name( available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, filename))) filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") self.return_code = 0
def get_galleries_from_xml(self, url_group: Iterable[str]) -> list[GalleryData]: possible_gallery_ids = [ self.id_from_url(gallery_url) for gallery_url in url_group ] galleries_ids = [ gallery_id.replace('mugi-B', 'B') for gallery_id in possible_gallery_ids if gallery_id ] galleries = list() gallery_chunks = list(chunks(galleries_ids, 25)) for i, group in enumerate(gallery_chunks): logger.info( "Calling API ({}). Gallery group: {}, galleries in group: {}, total groups: {}" .format(self.name, i + 1, len(group), len(gallery_chunks))) # API doesn't say anything about needing to wait between requests, but we wait just in case. if i > 0: time.sleep(self.own_settings.wait_timer) link = constants.main_page + '/api/' + self.own_settings.api_key + '/?S=getID&ID=' + ",".join( galleries_ids) request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( link, request_dict, post=False, ) if not response: continue response.encoding = 'utf-8' api_galleries = convert_api_response_text_to_gallery_dicts( response.text) if not api_galleries: continue galleries.extend(api_galleries) return galleries
def get_values_from_gallery_link(self, link: str) -> Optional[GalleryData]: request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( link, request_dict, post=False, ) if not response: return None response.encoding = 'utf-8' return self.process_regular_gallery_page(link, response.text)
def start_download(self) -> None: if not self.gallery or not self.gallery.temp_archive: return logger.info( "Downloading an archive: {} from a Panda Backup-like source: {}". format(self.gallery.title, self.gallery.temp_archive['link'])) to_use_filename = get_base_filename_string_from_gallery_data( self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, to_use_filename + '.zip')) # TODO: File could be cbz. request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['stream'] = True request_file = request_with_retries( self.gallery.temp_archive['link'], request_dict, ) if not request_file: logger.error("Could not download archive") self.return_code = 0 return filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") self.return_code = 0
def request_torrent_download( self, root: str, gid: str, token: str) -> Optional[requests.models.Response]: url = root + '/gallerytorrents.php' params = {'gid': gid, 't': token} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = params response = request_with_retries( url, request_dict, post=True, ) return response
def get_values_from_gallery_link(self, link: str) -> Optional[GalleryData]: link_root, gid, token = root_gid_token_from_link(link) if link_root is None or gid is None or token is None: return None if self.own_settings.use_ex_for_fjord and self.own_settings.cookies and link_root == constants.ex_api_url: api_page = constants.ex_api_url else: api_page = constants.ge_api_url data = utilities.request_data_from_gid_token_iterable([(gid, token)]) headers = {'Content-Type': 'application/json'} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['headers'] = {**headers, **self.settings.requests_headers} request_dict['data'] = json.dumps(data) response = request_with_retries( api_page, request_dict, post=True, ) if not response: return None try: response_data = response.json() except(ValueError, KeyError): logger.error("Could not parse response to JSON: {}".format(response.text)) return None for gallery_data in response_data['gmetadata']: if 'error' in gallery_data: logger.error( "Adding gallery {}: " "failed with error: {}".format(gallery_data['gid'], gallery_data['error']) ) return None internal_gallery_data = map_external_gallery_data_to_internal(gallery_data) return internal_gallery_data return None
def request_archive_download( self, root: str, gid: str, token: str, key: str) -> Optional[requests.models.Response]: url = root + '/archiver.php' params = {'gid': gid, 'token': token, 'or': key} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = params request_dict['data'] = constants.archive_download_data response = request_with_retries( url, request_dict, post=True, ) return response
def compare_by_title_json(self, title: str) -> bool: # https://www.fakku.net/suggest/return%20of%20me headers = { 'Content-Type': 'application/json', 'Referer': constants.main_url + '/', 'X-Requested-With': 'XMLHttpRequest', } logger.info("Querying URL: {}".format(urljoin(constants.main_url, 'suggest/') + quote(title.lower()))) request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['headers'] = {**headers, **self.settings.requests_headers} response = request_with_retries( urljoin(constants.main_url, 'suggest/') + quote(title.lower()), request_dict, post=False, retries=3 ) if not response: logger.info("Got no response from server") return False response_data = response.json() matches_links = set() if 'error' in response_data: logger.info("Got error from server: {}".format(response_data['error'])) return False for gallery in response_data: if gallery['type'] in ('doujinshi', 'manga', 'hentai', 'magazine'): matches_links.add(urljoin(constants.main_url, gallery['link'])) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def compare_by_title_google(self, title: str) -> bool: payload = {'q': 'site:e-hentai.org ' + title} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = payload r = requests.get( "https://www.google.com/search", **request_dict ) matches_links = set() m = re.finditer(r'(ex|g\.e-|e-)hentai\.org/g/(\d+)/(\w+)', r.text) if m: for match in m: matches_links.add( self.get_final_link_from_link( link_from_gid_token_fjord(match.group(2), match.group(3), False) ) ) m2 = re.finditer( r'(ex|g\.e-|e-)hentai\.org/gallerytorrents\.php\?gid=(\d+)&t=(\w+)/', r.text) if m2: for match in m2: matches_links.add( self.get_final_link_from_link( link_from_gid_token_fjord(match.group(2), match.group(3), False) ) ) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def compare_by_title(self, title: str) -> bool: request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = {'q': title} r = request_with_retries( urljoin(constants.main_url, 'search/'), request_dict, ) if not r: logger.info("Got no response from server") return False r.encoding = 'utf-8' soup_1 = BeautifulSoup(r.text, 'html.parser') matches_links = set() discard_titles = ["(SPANISH)", "(FRENCH)"] for link_container in soup_1.find_all( "a", class_=re.compile("product-card")): # Discard spanish/french releases title_container = link_container.find("div", class_="product-card__name") if any(x for x in discard_titles if title_container.get_text().startswith(x)): continue matches_links.add( urljoin( constants.main_url, urljoin(link_container['href'], urlparse(link_container['href']).path))) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def crawl_feed(self, feed_url: str = '') -> list[ChaikaGalleryData]: request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( feed_url, request_dict, post=False, ) dict_list = [] if not response: return [] try: json_decoded = response.json() except (ValueError, KeyError): logger.error("Could not parse response to JSON: {}".format( response.text)) return [] if type(json_decoded) == dict: if 'galleries' in json_decoded: dict_list = json_decoded['galleries'] else: dict_list.append(json_decoded) elif type(json_decoded) == list: dict_list = json_decoded total_galleries_filtered: list[ChaikaGalleryData] = [] for gallery in dict_list: if 'result' in gallery: continue gallery['posted'] = datetime.fromtimestamp(int(gallery['posted']), timezone.utc) gallery_data = ChaikaGalleryData(**gallery) total_galleries_filtered.append(gallery_data) return total_galleries_filtered
def get_values_from_gallery_link(self, link: str) -> Optional[GalleryData]: request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( link, request_dict, post=False, ) if not response: return None response.encoding = 'utf-8' new_text = re.sub(r'(<div class="right">\d+?)</b>', r'\1', response.text) if constants.main_url + '/magazines/' in link: return self.process_magazine_page(link, new_text) else: return self.process_regular_gallery_page(link, new_text)
def get_final_link_from_link(self, link: str) -> str: time.sleep(self.own_settings.wait_timer) gallery_gid, gallery_token = get_gid_token_from_link(link) gallery_link = link_from_gid_token_fjord(gallery_gid, gallery_token, True) request_dict = construct_request_dict(self.settings, self.own_settings) gallery_page_text = requests.get( gallery_link, **request_dict ).text if 'Gallery Not Available' in gallery_page_text: return gallery_link else: gallery_parser = GalleryHTMLParser() gallery_parser.feed(gallery_page_text) if gallery_parser.found_non_final_gallery == 2 and gallery_parser.non_final_gallery: return self.get_final_link_from_link(gallery_parser.non_final_gallery) return gallery_link
def start_download(self) -> None: if not self.gallery or not self.gallery.link or not self.gallery.archiver_key: return to_use_filename = get_base_filename_string_from_gallery_data( self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, to_use_filename + '.zip')) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get(self.gallery.archiver_key, stream='True', **request_dict) filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") os.remove(filepath) self.return_code = 0
def compare_by_title(self, image_title: str) -> bool: filters = {'f_search': '"' + image_title + '"'} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = filters r = requests.get( constants.ex_page, **request_dict ) parser = SearchHTMLParser() parser.feed(r.text) self.gallery_links = list(parser.galleries) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def parse_posted_date_from_feed(self, link: str, gid: str) -> Optional[datetime]: request_dict = construct_request_dict(self.settings, self.own_settings) response = request_with_retries( link, request_dict, post=False, ) if not response: return None response.encoding = 'utf-8' feed = feedparser.parse(response.text) for item in feed['items']: if gid in item['id']: return date_parser.parse( item['published'], tzinfos=constants.extra_feed_url_timezone) return None
def compare_by_title(self, title: str) -> bool: headers = {'Content-Type': 'application/json'} api_link = constants.posts_api_url payload = {'search': title} request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['headers'] = {**headers, **self.settings.requests_headers} request_dict['params'] = payload response = request_with_retries( api_link, request_dict, post=False, ) if not response: return False response.encoding = 'utf-8' try: response_data = response.json() except(ValueError, KeyError): return False matches_links = set() for gallery in response_data: matches_links.add(gallery['link']) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def compare_by_title(self, title: str) -> bool: request_dict = construct_request_dict(self.settings, self.own_settings) request_dict['params'] = {'q': title} r = requests.get("{}/search/".format(constants.main_page), **request_dict) m = re.finditer(r'a href="/g/(\d+)/"', r.text) matches_links = set() if m: for match in m: matches_links.add("{}{}".format( constants.gallery_container_url, match.group(1))) self.gallery_links = list(matches_links) if len(self.gallery_links) > 0: self.found_by = self.name return True else: return False
def start_download(self) -> None: if not self.gallery: return to_use_filename = get_base_filename_string_from_gallery_data( self.gallery) to_use_filename = replace_illegal_name(to_use_filename) self.gallery.filename = available_filename( self.settings.MEDIA_ROOT, os.path.join(self.own_settings.archive_dl_folder, to_use_filename + '.zip')) if not (self.gallery.root and self.gallery.gid and self.gallery.token and self.gallery.archiver_key): logger.error( 'Missing required data -> root: {}, gid: {}, token: {}, archiver_key: {}.' .format( self.gallery.root, self.gallery.gid, self.gallery.token, self.gallery.archiver_key, )) self.return_code = 0 return r = self.request_archive_download(self.gallery.root, self.gallery.gid, self.gallery.token, self.gallery.archiver_key) if not r: logger.error('Could not get download link.') self.return_code = 0 return r.encoding = 'utf-8' if 'Invalid archiver key' in r.text: logger.error("Invalid archiver key received.") self.return_code = 0 else: archive_link = get_archive_link_from_html_page(r.text) if archive_link == '': logger.error( 'Could not find archive link, page text: {}'.format( r.text)) self.return_code = 0 else: m = re.match(r"(.*?)(\?.*?)", archive_link) if m: archive_link = m.group(1) logger.info('Got link: {}, from url: {}'.format( archive_link, r.url)) request_dict = construct_request_dict(self.settings, self.own_settings) request_file = requests.get(archive_link + '?start=1', stream='True', **request_dict) if r and r.status_code == 200: logger.info( 'Downloading gallery: {}.zip'.format(to_use_filename)) filepath = os.path.join(self.settings.MEDIA_ROOT, self.gallery.filename) with open(filepath, 'wb') as fo: for chunk in request_file.iter_content(4096): fo.write(chunk) self.gallery.filesize, self.gallery.filecount = get_zip_fileinfo( filepath) if self.gallery.filesize > 0: self.crc32 = calc_crc32(filepath) self.fileDownloaded = 1 self.return_code = 1 else: logger.error("Could not download archive") self.return_code = 0