コード例 #1
0
def spider(url_list):
    fetcher = Fetch('/tmp')
    spider = Rolling( fetcher, url_list )

    debug = False
    debug = True

    spider_runner = GSpider(spider, workers_count=1, debug=debug)
    spider_runner.start()
コード例 #2
0
def main():
    headers = {
    }

    dongxi = Dongxi()
    fetcher = NoCacheFetch(0,headers=headers)
    spider = Rolling(fetcher,dongxi.daily_dongxi())
    spider_runner = GSpider(spider, workers_count=3)
    spider_runner.start()
コード例 #3
0
def spider(url_list):
    #    fetcher = MultiHeadersFetch(  headers=tuple( { 'Cookie': i } for i in COOKIE))
    fetcher = Fetch('/tmp', tuple({'Cookie': i} for i in COOKIE), 3.33,
                    zhihu_topic_title)
    spider = Rolling(fetcher, url_list)

    debug = False
    debug = True

    spider_runner = GSpider(spider, workers_count=1, debug=debug)
    spider_runner.start()
コード例 #4
0
def spider(url_list):
    fetcher = MultiHeadersFetch( 
        headers=tuple(
            { 'Cookie': i } for i in COOKIE
        )
    )
    spider = Rolling( fetcher, url_list )

    debug = False
    debug = True

    spider_runner = GSpider(spider, workers_count=2, debug=debug)
    spider_runner.start()
コード例 #5
0
ファイル: wm_user.py プロジェクト: xqk/42qu_github_mirror
def spider(url_list):
    fetcher = NoCacheFetch(
        0, {"Cookie": COOKIE}
        #'/home/zuroc/tmp',
        #tuple( { 'Cookie': i.replace('Cookie:','').strip() } for i in COOKIE),
        #1,
    )
    spider = Rolling(fetcher, url_list)

    debug = False
    debug = True

    spider_runner = GSpider(spider, workers_count=1, debug=debug)
    spider_runner.start()
コード例 #6
0
def spider(url_list):
    fetcher = Fetch(
        '/home/zuroc/tmp',
        tuple( { 'Cookie': i.replace('Cookie:','').strip() } for i in COOKIE),
        25,
        zhihu_topic_title
    )
    spider = Rolling( fetcher, url_list )

    debug = False
    debug = True

    spider_runner = GSpider(spider, workers_count=1, debug=debug)
    spider_runner.start()
コード例 #7
0
def main():
    url_list = []
    for url, user_id, zsite_id in DOUBAN_SITE_LIST:
        url_list.append((ParseEventIndex(user_id, zsite_id),
                         'http://site.douban.com/widget/events/%s/' % url))

    #self.url, self.user_id, self.zsite_id = url, user_id, zsite_id
    headers = {
        'Cookie': 'bid=i9gsK/lU40A',
    }
    fetcher = NoCacheFetch(10, headers=headers)
    spider = Rolling(fetcher, url_list)
    spider_runner = GSpider(spider, workers_count=1)
    spider_runner.start()
コード例 #8
0
ファイル: daily.py プロジェクト: xqk/42qu_github_mirror
def main():
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1.7) Gecko/20091221 Firefox/3.5.7',
        'Accept':
        ' text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
        'Accept-Language': 'zh-cn,zh;q=0.5',
        'Accept-Charset': 'gb18030,utf-8;q=0.7,*;q=0.7',
        'Content-type': 'application/x-www-form-urlencoded'
    }
    fetcher = NoCacheFetch(0, headers=headers)
    spider = Rolling(fetcher, pagelister())
    spider_runner = GSpider(spider, workers_count=10)
    spider_runner.start()
コード例 #9
0
def spider(url_list):
    #    fetcher = MultiHeadersFetch(  headers=tuple( { 'Cookie': i } for i in COOKIE))
    fetcher = Fetch('/tmp', tuple({'Cookie': i} for i in COOKIE), 2.6,
                    zhihu_topic_title)
    spider = Rolling(fetcher, url_list)

    debug = False
    debug = True

    spider_runner = GSpider(spider, workers_count=1, debug=debug)
    spider_runner.start()

    global QUESTION_ID_SET
    QUESTION_ID_SET = tuple(QUESTION_ID_SET)
    with open("question_id.py", "w") as question:
        question.write("QUESTION_ID_SET = ")
        question.write(pformat(QUESTION_ID_SET))