Exemple #1
0
    def get_original_url(self, image_id):
        url = self.POST_URL % {"image_id": image_id}
        req = urllib.request.Request(url)
        try:
            result_page_raw = get_url_opener(self.ui).open(req).read()
            result_page = result_page_raw.decode('utf-8')
            if re.findall(self.REGEX_AD, result_page):
                self.ui.updateStatus(
                    "Advertisement found, retry after 5 sec...")
                gevent.sleep(5)
                return self.get_original_url(self, image_id)
            try:
                original_url_list = re.findall(self.REGEX_RESIZE_ORIGINAL_URL,
                                               result_page)
                if not original_url_list:
                    original_url_list = re.findall(self.REGEX_ORIGINAL_URL,
                                                   result_page)
                original_url = original_url_list[0]

                parse_result = urllib.parse.urlparse(original_url)
                unparse_args = (self.URL_SCHEME, *parse_result[1:])
                fixed_original_url = urllib.parse.urlunparse(unparse_args)

                target = dict()
                target["referer"] = url
                target["image_url"] = fixed_original_url
                self.target_list.append(target)
            except IndexError:
                self.ui.updateError(
                    "Error: Cannot find original image URL of %s" % url)
        except urllib.error.URLError as e:
            self.ui.updateError(
                "Error while fetching original image URL from %s: %s" %
                (url, e))
Exemple #2
0
 def get_original_url(self, image_id):
     url = self.POST_URL % {"image_id": image_id}
     req = urllib2.Request(url)
     try:
         result_page = get_url_opener(self.ui).open(req).read()
         if re.findall(self.REGEX_AD, result_page):
             self.ui.updateStatus(
                 "Advertisement found, retry after 5 sec...")
             gevent.sleep(5)
             return self.get_original_url(self, image_id)
         try:
             original_url = re.findall(self.REGEX_RESIZE_ORIGINAL_URL,
                                       result_page)
             if not original_url:
                 original_url = re.findall(self.REGEX_ORIGINAL_URL,
                                           result_page)
             target = dict()
             target["referer"] = url
             target["image_url"] = original_url[0]
             self.target_list.append(target)
         except IndexError:
             self.ui.updateError(
                 "Error: Cannot find original image URL of %s" % url)
     except urllib2.URLError, e:
         self.ui.updateError(
             "Error while fetching original image URL from %s: %s" %
             (url, e))
    def get_original_url(self, image_id):
        url = self.POST_URL % {"image_id": image_id}
        req = urllib.request.Request(url)
        try:
            result_page_raw = get_url_opener(self.ui).open(req).read()
            result_page = result_page_raw.decode('utf-8')
            if re.findall(self.REGEX_AD, result_page):
                self.ui.updateStatus("Advertisement found, retry after 5 sec...")
                gevent.sleep(5)
                return self.get_original_url(self, image_id)
            try:
                original_url_list = re.findall(self.REGEX_RESIZE_ORIGINAL_URL, result_page)
                if not original_url_list:
                    original_url_list = re.findall(self.REGEX_ORIGINAL_URL, result_page)
                original_url = original_url_list[0]

                parse_result = urllib.parse.urlparse(original_url)
                unparse_args = (self.URL_SCHEME, *parse_result[1:])
                fixed_original_url = urllib.parse.urlunparse(unparse_args)

                target = dict()
                target["referer"] = url
                target["image_url"] = fixed_original_url
                self.target_list.append(target)
            except IndexError:
                self.ui.updateError("Error: Cannot find original image URL of %s" % url)
        except urllib.error.URLError as e:
            self.ui.updateError("Error while fetching original image URL from %s: %s" % (url, e))
Exemple #4
0
 def get_list_with_page(self, page=0):
     url = self.LIST_URL % {"page_index": page * self.IMAGE_PER_PAGE, "tags": self.tags}
     req = urllib2.Request(url)
     try:
         result_page = get_url_opener(self.ui).open(req).read()
         if re.findall(self.REGEX_AD, result_page):
             self.ui.updateStatus("Advertisement found, retry after 5 sec...")
             gevent.sleep(5)
             return self.get_list_with_page(self, page)
         partial_list = re.findall(self.REGEX_POST_ID, result_page)
         self.ui.updateStatus("Found %d images on page %d" % (len(partial_list), page+1))
         return partial_list
     except urllib2.URLError:
         self.ui.updateError("Cannot connect to server. Maybe bad internet connection?")
         return list()
Exemple #5
0
    def get_image(self, target, total_count):
        image_referer = target["referer"]
        image_url = target["image_url"]
        fname = image_url.split("/")[-1]

        if os.path.exists(os.path.join(self.fullpath, fname)) \
                and (not self.ui.overwriteFile.IsChecked()):
            # We don't have to download the existing file again
            # if user does not want to.
            self.downloaded += 1
            self.ui.updateStatus("Progress %s/%s (%.2f %%) - SKIP! (Already downloaded)" % (
                self.downloaded, total_count,
                self.downloaded * 100.0 / total_count))
            return

        req = urllib.request.Request(image_url)
        req.add_header("referer", image_referer)
        try:
            response = get_url_opener(self.ui).open(req)
            img_file_buffer = BytesIO()
            while True:
                chunk = response.read(16384)
                if not chunk:
                    break
                img_file_buffer.write(chunk)
                self.total_rx_bytes += len(chunk)

            fp = open(os.path.join(self.fullpath, fname), "wb")
            fp.write(img_file_buffer.getvalue())
            fp.close()
            self.downloaded += 1
            self.ui.updateStatus("Progress: %s/%s (%.2f %%)" % (
                self.downloaded, total_count,
                self.downloaded * 100.0 / total_count)
            )
        except urllib.error.HTTPError as ue:
            if ue.code == 503:
                # Temporarily Unavailable Error: Retry!
                self.pool.spawn(self.get_image, image_url, total_count)
            else:
                self.ui.updateError("Error: %s" % ue)
        except Exception as e:
            self.ui.updateError("Error: %s, %s" % (e, image_referer))
Exemple #6
0
 def get_original_url(self, image_id):
     url = self.POST_URL % {"image_id": image_id}
     req = urllib2.Request(url)
     try:
         result_page = get_url_opener(self.ui).open(req).read()
         if re.findall(self.REGEX_AD, result_page):
             self.ui.updateStatus("Advertisement found, retry after 5 sec...")
             gevent.sleep(5)
             return self.get_original_url(self, image_id)
         try:
             original_url = re.findall(self.REGEX_RESIZE_ORIGINAL_URL, result_page)
             if not original_url:
                 original_url = re.findall(self.REGEX_ORIGINAL_URL, result_page)
             target = dict()
             target["referer"] = url
             target["image_url"] = original_url[0]
             self.target_list.append(target)
         except IndexError:
             self.ui.updateError("Error: Cannot find original image URL of %s" % url)
     except urllib2.URLError, e:
         self.ui.updateError("Error while fetching original image URL from %s: %s" % (url, e))
Exemple #7
0
 def get_list_with_page(self, page=0):
     url = self.LIST_URL % {
         "page_index": page * self.IMAGE_PER_PAGE,
         "tags": self.tags
     }
     req = urllib2.Request(url)
     try:
         result_page = get_url_opener(self.ui).open(req).read()
         if re.findall(self.REGEX_AD, result_page):
             self.ui.updateStatus(
                 "Advertisement found, retry after 5 sec...")
             gevent.sleep(5)
             return self.get_list_with_page(self, page)
         partial_list = re.findall(self.REGEX_POST_ID, result_page)
         self.ui.updateStatus("Found %d images on page %d" %
                              (len(partial_list), page + 1))
         return partial_list
     except urllib2.URLError:
         self.ui.updateError(
             "Cannot connect to server. Maybe bad internet connection?")
         return list()