Python Download Beispiele

Programmiersprache: Python

Namespace / Paketname: common

Klasse / Typ: Download

Beispiele auf hotexamples.com: 9

Python Download - 9 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die common.Download, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Download(8)

download(2)

decode(1)

Häufig verwendete Methoden

Download (8)

download (2)

decode (1)

Beispiel #1

Datei anzeigen

Datei: 1.4.2sitemap_craw.py Projekt: houjq18/CsLearning

def crawl_sitemap(url):
    # download the sitemap file as sitemap.xml
    sitemap = Download(url)
    links = re.findall('<loc>(.*?)</loc>', sitemap.decode("utf-8"))
    # download each link
    for link in links:
        html = Download(link)

Beispiel #2

Datei anzeigen

class SpiderMain(object):
    
    def __init__(self):
        self.manager = URLManager()
        self.down = Download()
        self.parser = HtmlParser()
        self.output = DataOutput()
    
    def crawl(self, root_url):
        content = self.down.download(root_url)
        movie_ids = self.parser.parse_urls(content)
        count = 0
        
        for mid in movie_ids:
            if count > 10:
                break
            movie_link = '''http://service.library.mtime.com/Movie.api?\
            Ajax_CallBack=true\
            &Ajax_CallBackType=Mtime.Library.Services\
            &Ajax_CallBackMethod=GetMovieOverviewRating\
            &Ajax_CrossDomain=1\
            &Ajax_RequestUrl=http%3A%2F%2Fmovie.mtime.com%2F{0}%2F\
            &t={1}\
            &Ajax_CallBackArgument0={2}\
            '''.format(mid, datetime.datetime.now().strftime("%Y%m%d%H%M%S%f"), mid)
            
            res = self.down.download(movie_link.replace(' ', ''))
            self.parser.parser_json(res)
            count += 1
        
        self.output.store_data(self.parser.items)
        self.output.close_connect()

Beispiel #3

Datei anzeigen

Datei: 1.4.2sitemap_crawler.py Projekt: abhijeetsingh1704/WebScrapping_learning

def crawl_sitemap(url):
    # download the sitemap file
    sitemap = Download(url)
    #>Downloading: http://example.webscraping.com/sitemap.xml
    # extract the sitemap links
    links = re.findall('<loc>(.*?)</loc>', sitemap)
    # download each link
    for link in links:
        html = Download(link)

Beispiel #4

Datei anzeigen

Datei: 1.4.4link_crawler1.py Projekt: abhijeetsingh1704/WebScrapping_learning

def link_crawler(seed_url, link_regex):
    """Crawl from the given seed URL following links matched by link_regex
    """
    crawl_queue = [seed_url]  # the queue of URL's to download
    while crawl_queue:
        url = crawl_queue.pop()
        html = Download(url)
        # filter for links matching our regular expression
        for link in get_links(html):
            if re.match(link_regex, link):
                # add this link to the crawl queue
                crawl_queue.append(link)

Beispiel #5

Datei anzeigen

Datei: 1.4.3iteration_crawler1.py Projekt: abhijeetsingh1704/WebScrapping_learning

def iteration():
    for page in itertools.count(1):
        url = 'http://example.webscraping.com/view/-%d' % page
        #url = 'http://example.webscraping.com/view/-{}'.format(page)
        html = Download(url)
        if html is None:
            # received an error trying to download this webpage
            # so assume have reached the last country ID and can stop downloading
            break
        else:
            # success - can scrape the result
            # ...
            pass

Beispiel #6

Datei anzeigen

Datei: 1.4.4link_crawler2.py Projekt: abhijeetsingh1704/WebScrapping_learning

def link_crawler(seed_url, link_regex):
    """Crawl from the given seed URL following links matched by link_regex
    """
    crawl_queue = [seed_url]
    seen = set(crawl_queue) # keep track which URL's have seen before
    while crawl_queue:
        url = crawl_queue.pop()
        html = Download(url)
        for link in get_links(html):
            # check if link matches expected regex
            if re.match(link_regex, link):
                # form absolute link
                link = urlparse.urljoin(seed_url, link)
                crawl_queue.append(link)

Beispiel #7

Datei anzeigen

def iteration():
    """连续N次下载出错后推出程序"""
    max_errors = 5  # maximum number of consecutive download errors allowed
    num_errors = 0  # current number of consecutive download errors
    for page in itertools.count(1):
        url = 'http://example.webscraping.com/places/default/view/-{}'.format(
            page)
        html = Download(url)
        if html is None:
            # received an error trying to download this webpage
            num_errors += 1
            if num_errors == max_errors:
                # reached maximum amount of errors in a row so exit
                break
            # so assume have reached the last country ID and can stop downloading
        else:
            # success - can scrape the result
            # ...
            num_errors = 0

Beispiel #8

Datei anzeigen

Datei: test.py Projekt: chnhgn/crawl

from common import Download
from parsers import HtmlParser

dl = Download()
parse = HtmlParser()

content = dl.download('http://theater.mtime.com/China_Beijing/')
res = parse._parse_movies(content)
print(res)

Beispiel #9

Datei anzeigen

 def __init__(self):
     self.manager = URLManager()
     self.down = Download()
     self.parser = HtmlParser()
     self.output = DataOutput()