def TestCnWeiboFetcher(user, pwd):

    uid = 10029
    #     uid = 10057
    #     uid = 10111
    #     uid = 10145
    #     uid = 10211
    #     uid = 10318
    #     uid = 10361
    #     uid = 10392

    page = 1

    f_weibos = './file/weibos-%s.txt' % (uid)
    f_follows = './file/follows-%s.txt' % (uid)
    f_fans = './file/fans-%s.txt' % (uid)
    f_infos = './file/infos-%s.txt' % (uid)

    #-------------------------------------------
    start = time.time()

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    print 'test for check cookie weibo.com'
    print fetcher.check_cookie()

    print 'test for check user exist...'
    print fetcher.check_user(uid)

    print 'test for fetch weibo...'
    url = 'http://weibo.cn/%s?page=%s' % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_weibos, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch follows...'
    url = 'http://weibo.cn/%s/follow?page=%s' % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_follows, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch fans...'
    url = 'http://weibo.cn/%s/fans?page=%s' % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_fans, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch infos...'
    url = 'http://weibo.cn/%s/info' % uid
    html = fetcher.fetch(url)
    with codecs.open(f_infos, 'w', 'utf-8') as f:
        f.write(html)

    cost_time = int(time.time() - start)

    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)
def TestCnWeiboFetcher(user, pwd):

    uid = 10029
    #     uid = 10057
    #     uid = 10111
    #     uid = 10145
    #     uid = 10211
    #     uid = 10318
    #     uid = 10361
    #     uid = 10392

    page = 1

    f_weibos = "./file/weibos-%s.txt" % (uid)
    f_follows = "./file/follows-%s.txt" % (uid)
    f_fans = "./file/fans-%s.txt" % (uid)
    f_infos = "./file/infos-%s.txt" % (uid)

    # -------------------------------------------
    start = time.time()

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    print "test for check cookie weibo.com"
    print fetcher.check_cookie()

    print "test for check user exist..."
    print fetcher.check_user(uid)

    print "test for fetch weibo..."
    url = "http://weibo.cn/%s?page=%s" % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_weibos, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch follows..."
    url = "http://weibo.cn/%s/follow?page=%s" % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_follows, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch fans..."
    url = "http://weibo.cn/%s/fans?page=%s" % (uid, page)
    html = fetcher.fetch(url)
    with codecs.open(f_fans, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch infos..."
    url = "http://weibo.cn/%s/info" % uid
    html = fetcher.fetch(url)
    with codecs.open(f_infos, "w", "utf-8") as f:
        f.write(html)

    cost_time = int(time.time() - start)

    print "finished: # connections: %s, cost time: %s" % (fetcher.n_connections, cost_time)
def TestCnWeiboCrawler(user, pwd):

    uid = 10029
    # uid = 10057
    # uid = 10111
    # uid = 10145
    # uid = 10211
    # uid = 10318
    # uid = 10361
    # uid = 10392

    store_path = './file/'

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    start = time.time()

    login_ok = fetcher.check_cookie()

    if not login_ok:
        print 'login failed.'
        sys.exit()

    fetcher.n_connections = 0
    print 'crawl follows'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_follows()

    fetcher.n_connections = 0
    print 'crawl fans'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_fans()

    cost_time = int(time.time() - start)

    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)
def TestCnWeiboCrawler(user, pwd):
    
    uid = 10029
    # uid = 10057
    # uid = 10111
    # uid = 10145
    # uid = 10211
    # uid = 10318
    # uid = 10361
    # uid = 10392

    store_path = './file/'

    fetcher = CnWeiboFetcher(username=user, password=pwd)

    start = time.time()
    
    login_ok = fetcher.check_cookie()
    
    if not login_ok:
        print 'login failed.'
        sys.exit()
    
    fetcher.n_connections = 0
    print 'crawl follows'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_follows()
    
    fetcher.n_connections = 0
    print 'crawl fans'
    crawler = CnWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_fans()
     
    cost_time = int(time.time() - start)    
    
    print 'finished: # connections: %s, cost time: %s' %(fetcher.n_connections, cost_time)
Example #5
0
    def on_login(self, event):
        website = SITE_CHOICES[self.website.GetCurrentSelection()]
        account = self.account.GetValue().strip().encode('UTF-8')
        password = self.password.GetValue().strip().encode('UTF-8')

        if (account is None or len(account) == 0 or password is None
                or len(password) == 0):
            wx.MessageBox(
                message='Account/Password cannot be blank. Please retry!',
                caption='Warning',
                style=wx.OK | wx.ICON_INFORMATION)
        else:
            if website in [settings.COMWEIBO, settings.CNWEIBO]:
                if website == settings.COMWEIBO:
                    fetcher = ComWeiboFetcher(username=account,
                                              password=password,
                                              window=self)
                elif website == settings.CNWEIBO:
                    fetcher = CnWeiboFetcher(username=account,
                                             password=password,
                                             window=self)
                else:
                    return

                if fetcher.check_cookie():
                    fetcher.window = None

                    self.Destroy()
                    wx.GetApp().ExitMainLoop()

                    import win_local_crawler as wlc
                    wlc.main(account_display=account,
                             website_display=website,
                             fetcher=fetcher)
            elif website == settings.TWITTER:
                wx.MessageBox(
                    message='For Twitter: not implemented. Please retry!',
                    caption='Error',
                    style=wx.OK | wx.ICON_INFORMATION)
            elif website == settings.FACEBOOK:
                wx.MessageBox(
                    message='For Facebook: not implemented. Please retry!',
                    caption='Error',
                    style=wx.OK | wx.ICON_INFORMATION)
def TestWeibo__init__(user, pwd, weibo_com):

    if weibo_com:
        fetcher = ComWeiboFetcher(username=user, password=pwd)
    else:
        fetcher = CnWeiboFetcher(username=user, password=pwd)

    login_ok = fetcher.check_cookie()
    if not login_ok:
        print 'login failed.'
        sys.exit()

    uids = [1000000253, 10057, 10029]

    msg_urls = [
        'http://weibo.com/1000000253/ezC36cq3i6G',
        'http://weibo.com/1713926427/A2V5CENGU'
    ]

    start = time.time()

    print 'crawl weibos'
    sina_weibo.main(fetcher,
                    fetch_data='weibos',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl follows'
    sina_weibo.main(fetcher,
                    fetch_data='follows',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl fans'
    sina_weibo.main(fetcher,
                    fetch_data='fans',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl infos'
    sina_weibo.main(fetcher,
                    fetch_data='infos',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl reposts'
    sina_weibo.main(fetcher,
                    store_path='./file/',
                    msg_urls=msg_urls,
                    fetch_data='repost',
                    weibo_com=weibo_com)

    print 'crawl comments'
    sina_weibo.main(fetcher,
                    store_path='./file/',
                    msg_urls=msg_urls,
                    fetch_data='comment',
                    weibo_com=weibo_com)

    cost_time = int(time.time() - start)
    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)