def get_original_url(self, image_id): url = self.POST_URL % {"image_id": image_id} req = urllib.request.Request(url) try: result_page_raw = get_url_opener(self.ui).open(req).read() result_page = result_page_raw.decode('utf-8') if re.findall(self.REGEX_AD, result_page): self.ui.updateStatus( "Advertisement found, retry after 5 sec...") gevent.sleep(5) return self.get_original_url(self, image_id) try: original_url_list = re.findall(self.REGEX_RESIZE_ORIGINAL_URL, result_page) if not original_url_list: original_url_list = re.findall(self.REGEX_ORIGINAL_URL, result_page) original_url = original_url_list[0] parse_result = urllib.parse.urlparse(original_url) unparse_args = (self.URL_SCHEME, *parse_result[1:]) fixed_original_url = urllib.parse.urlunparse(unparse_args) target = dict() target["referer"] = url target["image_url"] = fixed_original_url self.target_list.append(target) except IndexError: self.ui.updateError( "Error: Cannot find original image URL of %s" % url) except urllib.error.URLError as e: self.ui.updateError( "Error while fetching original image URL from %s: %s" % (url, e))
def get_original_url(self, image_id): url = self.POST_URL % {"image_id": image_id} req = urllib2.Request(url) try: result_page = get_url_opener(self.ui).open(req).read() if re.findall(self.REGEX_AD, result_page): self.ui.updateStatus( "Advertisement found, retry after 5 sec...") gevent.sleep(5) return self.get_original_url(self, image_id) try: original_url = re.findall(self.REGEX_RESIZE_ORIGINAL_URL, result_page) if not original_url: original_url = re.findall(self.REGEX_ORIGINAL_URL, result_page) target = dict() target["referer"] = url target["image_url"] = original_url[0] self.target_list.append(target) except IndexError: self.ui.updateError( "Error: Cannot find original image URL of %s" % url) except urllib2.URLError, e: self.ui.updateError( "Error while fetching original image URL from %s: %s" % (url, e))
def get_original_url(self, image_id): url = self.POST_URL % {"image_id": image_id} req = urllib.request.Request(url) try: result_page_raw = get_url_opener(self.ui).open(req).read() result_page = result_page_raw.decode('utf-8') if re.findall(self.REGEX_AD, result_page): self.ui.updateStatus("Advertisement found, retry after 5 sec...") gevent.sleep(5) return self.get_original_url(self, image_id) try: original_url_list = re.findall(self.REGEX_RESIZE_ORIGINAL_URL, result_page) if not original_url_list: original_url_list = re.findall(self.REGEX_ORIGINAL_URL, result_page) original_url = original_url_list[0] parse_result = urllib.parse.urlparse(original_url) unparse_args = (self.URL_SCHEME, *parse_result[1:]) fixed_original_url = urllib.parse.urlunparse(unparse_args) target = dict() target["referer"] = url target["image_url"] = fixed_original_url self.target_list.append(target) except IndexError: self.ui.updateError("Error: Cannot find original image URL of %s" % url) except urllib.error.URLError as e: self.ui.updateError("Error while fetching original image URL from %s: %s" % (url, e))
def get_list_with_page(self, page=0): url = self.LIST_URL % {"page_index": page * self.IMAGE_PER_PAGE, "tags": self.tags} req = urllib2.Request(url) try: result_page = get_url_opener(self.ui).open(req).read() if re.findall(self.REGEX_AD, result_page): self.ui.updateStatus("Advertisement found, retry after 5 sec...") gevent.sleep(5) return self.get_list_with_page(self, page) partial_list = re.findall(self.REGEX_POST_ID, result_page) self.ui.updateStatus("Found %d images on page %d" % (len(partial_list), page+1)) return partial_list except urllib2.URLError: self.ui.updateError("Cannot connect to server. Maybe bad internet connection?") return list()
def get_image(self, target, total_count): image_referer = target["referer"] image_url = target["image_url"] fname = image_url.split("/")[-1] if os.path.exists(os.path.join(self.fullpath, fname)) \ and (not self.ui.overwriteFile.IsChecked()): # We don't have to download the existing file again # if user does not want to. self.downloaded += 1 self.ui.updateStatus("Progress %s/%s (%.2f %%) - SKIP! (Already downloaded)" % ( self.downloaded, total_count, self.downloaded * 100.0 / total_count)) return req = urllib.request.Request(image_url) req.add_header("referer", image_referer) try: response = get_url_opener(self.ui).open(req) img_file_buffer = BytesIO() while True: chunk = response.read(16384) if not chunk: break img_file_buffer.write(chunk) self.total_rx_bytes += len(chunk) fp = open(os.path.join(self.fullpath, fname), "wb") fp.write(img_file_buffer.getvalue()) fp.close() self.downloaded += 1 self.ui.updateStatus("Progress: %s/%s (%.2f %%)" % ( self.downloaded, total_count, self.downloaded * 100.0 / total_count) ) except urllib.error.HTTPError as ue: if ue.code == 503: # Temporarily Unavailable Error: Retry! self.pool.spawn(self.get_image, image_url, total_count) else: self.ui.updateError("Error: %s" % ue) except Exception as e: self.ui.updateError("Error: %s, %s" % (e, image_referer))
def get_original_url(self, image_id): url = self.POST_URL % {"image_id": image_id} req = urllib2.Request(url) try: result_page = get_url_opener(self.ui).open(req).read() if re.findall(self.REGEX_AD, result_page): self.ui.updateStatus("Advertisement found, retry after 5 sec...") gevent.sleep(5) return self.get_original_url(self, image_id) try: original_url = re.findall(self.REGEX_RESIZE_ORIGINAL_URL, result_page) if not original_url: original_url = re.findall(self.REGEX_ORIGINAL_URL, result_page) target = dict() target["referer"] = url target["image_url"] = original_url[0] self.target_list.append(target) except IndexError: self.ui.updateError("Error: Cannot find original image URL of %s" % url) except urllib2.URLError, e: self.ui.updateError("Error while fetching original image URL from %s: %s" % (url, e))
def get_list_with_page(self, page=0): url = self.LIST_URL % { "page_index": page * self.IMAGE_PER_PAGE, "tags": self.tags } req = urllib2.Request(url) try: result_page = get_url_opener(self.ui).open(req).read() if re.findall(self.REGEX_AD, result_page): self.ui.updateStatus( "Advertisement found, retry after 5 sec...") gevent.sleep(5) return self.get_list_with_page(self, page) partial_list = re.findall(self.REGEX_POST_ID, result_page) self.ui.updateStatus("Found %d images on page %d" % (len(partial_list), page + 1)) return partial_list except urllib2.URLError: self.ui.updateError( "Cannot connect to server. Maybe bad internet connection?") return list()