예제 #1
0
def TestCnWeiboFetcher(user, pwd):

    uid = 10029
    #     uid = 10057
    #     uid = 10111
    #     uid = 10145
    #     uid = 10211
    #     uid = 10318
    #     uid = 10361
    #     uid = 10392

    page = 1

    f_weibos = './file/weibos-%s.txt' % (uid)
    f_follows = './file/follows-%s.txt' % (uid)
    f_fans = './file/fans-%s.txt' % (uid)
    f_infos = './file/infos-%s.txt' % (uid)

    #-------------------------------------------
    start = time.time()

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    print 'test for check cookie weibo.com'
    print fetcher.check_cookie()

    print 'test for check user exist...'
    print fetcher.check_user(uid)

    print 'test for fetch weibo...'
    url = 'http://weibo.cn/%s?page=%s' % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_weibos, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch follows...'
    url = 'http://weibo.cn/%s/follow?page=%s' % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_follows, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch fans...'
    url = 'http://weibo.cn/%s/fans?page=%s' % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_fans, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch infos...'
    url = 'http://weibo.cn/%s/info' % uid
    html = fetcher.fetch(url)
    with codecs.open(f_infos, 'w', 'utf-8') as f:
        f.write(html)

    cost_time = int(time.time() - start)

    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)
예제 #2
0
def TestCnWeiboFetcher(user, pwd):

    uid = 10029
    #     uid = 10057
    #     uid = 10111
    #     uid = 10145
    #     uid = 10211
    #     uid = 10318
    #     uid = 10361
    #     uid = 10392

    page = 1

    f_weibos = "./file/weibos-%s.txt" % (uid)
    f_follows = "./file/follows-%s.txt" % (uid)
    f_fans = "./file/fans-%s.txt" % (uid)
    f_infos = "./file/infos-%s.txt" % (uid)

    # -------------------------------------------
    start = time.time()

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    print "test for check cookie weibo.com"
    print fetcher.check_cookie()

    print "test for check user exist..."
    print fetcher.check_user(uid)

    print "test for fetch weibo..."
    url = "http://weibo.cn/%s?page=%s" % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_weibos, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch follows..."
    url = "http://weibo.cn/%s/follow?page=%s" % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_follows, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch fans..."
    url = "http://weibo.cn/%s/fans?page=%s" % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_fans, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch infos..."
    url = "http://weibo.cn/%s/info" % uid
    html = fetcher.fetch(url)
    with codecs.open(f_infos, "w", "utf-8") as f:
        f.write(html)

    cost_time = int(time.time() - start)

    print "finished: # connections: %s, cost time: %s" % (fetcher.n_connections, cost_time)
예제 #3
0
def TestCnWeiboCrawler(user, pwd):

    uid = 10029
    # uid = 10057
    # uid = 10111
    # uid = 10145
    # uid = 10211
    # uid = 10318
    # uid = 10361
    # uid = 10392

    store_path = './file/'

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    start = time.time()

    login_ok = fetcher.check_cookie()

    if not login_ok:
        print 'login failed.'
        sys.exit()

    fetcher.n_connections = 0
    print 'crawl follows'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_follows()

    fetcher.n_connections = 0
    print 'crawl fans'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_fans()

    cost_time = int(time.time() - start)

    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)
def TestCnWeiboCrawler(user, pwd):
    
    uid = 10029
    # uid = 10057
    # uid = 10111
    # uid = 10145
    # uid = 10211
    # uid = 10318
    # uid = 10361
    # uid = 10392

    store_path = './file/'

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    start = time.time()
    
    login_ok = fetcher.check_cookie()
    
    if not login_ok:
        print 'login failed.'
        sys.exit()
    
    fetcher.n_connections = 0
    print 'crawl follows'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_follows()
    
    fetcher.n_connections = 0
    print 'crawl fans'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_fans()
     
    cost_time = int(time.time() - start)    
    
    print 'finished: # connections: %s, cost time: %s' %(fetcher.n_connections, cost_time)