Python download_page Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: download_page

Examples at hotexamples.com: 7

Python download_page - 7 examples found. These are the top rated real world Python examples of utils.download_page extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: page_processor.py Project: ammar1x/events-dates-finder

def process_page(query_result):
    print "processing page %s " % query_result.href
    html_text = utils.download_page(query_result.href, timeout=2)
    dates = extract_dates(html_text)
    dates = [d for d in dates if d.dateRange.start >= date.today()]

    if not dates:
        return []

    print "found dates", dates

    rex = re.compile(query_result.title, re.I | re.UNICODE | re.MULTILINE)

    candidate_date = dates[0]
    min_dist = 10000000
    i = 0
    while i != -1 and i < len(html_text):
        match = rex.match(html_text, i)
        if match:
            s = match.start()
            for d in dates:
                if math.abs(d.startPos - s) < min_dist:
                    min_dist = math.abs(d.startPos - s)
                    candidate_date = d

            i = match.end() + 1
        else:
            break

    print "candidate", candidate_date
    return [Event(query_result.title, candidate_date.dateRange.start, candidate_date.dateRange.end)]

Example #2

Show file

 def _get_total_posts(self):
     url = self.base_url + "0&num=1"
     data = utils.download_page(url)
     if data:
         self.total_posts = int(self.total_post_re.findall(data)[0])
         if self.max_posts:
             self.total_posts = min(self.total_posts, self.max_posts)
         limit_start = self.limit_start
         while limit_start < self.total_posts:
             self.post_queue.put(limit_start)
             limit_start += self.num

Example #3

Show file

File: tumblr.py Project: devillab/tumblr_imgs_download

 def _get_total_posts(self):
     url = self.base_url + "0&num=1"
     data = utils.download_page(url)
     if data:
         self.total_posts = int(self.total_post_re.findall(data)[0])
         if self.max_posts:
             self.total_posts = min(self.total_posts, self.max_posts)
         limit_start = self.limit_start
         while limit_start < self.total_posts:
             self.post_queue.put(limit_start)
             limit_start += self.num

Example #4

Show file

    def _get_img_urls(self):
        while not self.post_queue.empty():
            limit_start = self.post_queue.get()
            url = self.base_url + str(limit_start) + "&num=" + str(self.num)
            data = utils.download_page(url, proxies=self.proxies)
            if data:
                imgs = self.img_re.findall(data)
                for img in imgs:
                    img = img.replace('\\', '')

                    if not self.need_save:
                        self.imglog.info("%s" % img)
                    else:
                        self.img_queue.put(img)

Example #5

Show file

File: tumblr.py Project: devillab/tumblr_imgs_download

    def _get_img_urls(self):
        while not self.post_queue.empty():
            limit_start = self.post_queue.get()
            url = self.base_url + str(limit_start) + "&num=" + str(self.num)
            data = utils.download_page(url, proxies=self.proxies)
            if data:
                imgs = self.img_re.findall(data)
                for img in imgs:
                    img = img.replace('\\', '')

                    if not self.need_save:
                        self.imglog.info("%s" % img)
                    else:
                        self.img_queue.put(img)

Example #6

Show file

File: mangareader.py Project: vigneshsarma/manga-dl

def get_single_chapter(name, chapter, url):
    folder = os.path.join(name.replace(' ', '_'), "ch{}".format(chapter))
    utils.mkdir_p(folder)
    for page, img in _get_pages(url):
        utils.download_page(folder, page, img)
    print 'making cbz for', folder

Example #7

Show file

    for (rex, format) in zip(rexs, formats):

        start_pos = 0
        n = len(string)
        # print format
        while 0 <= start_pos < n:
            match = rex.search(string, start_pos)
            if match:
                # print "\t" + match.group()
                try:
                    dates.append(DateInfo(match.group(1), format, match.start(1), match.end(1)))
                except:
                    pass
                start_pos = match.end() + 1
            else:
                break
    dates = [d for d in dates if d.dateRange.start <= date(3000, 1, 1)]
    return dates


# for testing
from utils import download_page

if __name__ == "__main__":
    pp = pprint.PrettyPrinter(indent=4)
    page_text = download_page(
            "http://starforce.eu/")
    date_strs = extract_dates(page_text)
    for date_str in date_strs:
        print date_str.startPos, date_str.endPos