def TestCnWeiboFetcher(user, pwd): uid = 10029 # uid = 10057 # uid = 10111 # uid = 10145 # uid = 10211 # uid = 10318 # uid = 10361 # uid = 10392 page = 1 f_weibos = './file/weibos-%s.txt' % (uid) f_follows = './file/follows-%s.txt' % (uid) f_fans = './file/fans-%s.txt' % (uid) f_infos = './file/infos-%s.txt' % (uid) #------------------------------------------- start = time.time() fetcher = CnWeiboFetcher(username=user, password=pwd) print 'test for check cookie weibo.com' print fetcher.check_cookie() print 'test for check user exist...' print fetcher.check_user(uid) print 'test for fetch weibo...' url = 'http://weibo.cn/%s?page=%s' % (uid, page) html = fetcher.fetch(url) with codecs.open(f_weibos, 'w', 'utf-8') as f: f.write(html) print 'test for fetch follows...' url = 'http://weibo.cn/%s/follow?page=%s' % (uid, page) html = fetcher.fetch(url) with codecs.open(f_follows, 'w', 'utf-8') as f: f.write(html) print 'test for fetch fans...' url = 'http://weibo.cn/%s/fans?page=%s' % (uid, page) html = fetcher.fetch(url) with codecs.open(f_fans, 'w', 'utf-8') as f: f.write(html) print 'test for fetch infos...' url = 'http://weibo.cn/%s/info' % uid html = fetcher.fetch(url) with codecs.open(f_infos, 'w', 'utf-8') as f: f.write(html) cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)
def TestCnWeiboFetcher(user, pwd): uid = 10029 # uid = 10057 # uid = 10111 # uid = 10145 # uid = 10211 # uid = 10318 # uid = 10361 # uid = 10392 page = 1 f_weibos = "./file/weibos-%s.txt" % (uid) f_follows = "./file/follows-%s.txt" % (uid) f_fans = "./file/fans-%s.txt" % (uid) f_infos = "./file/infos-%s.txt" % (uid) # ------------------------------------------- start = time.time() fetcher = CnWeiboFetcher(username=user, password=pwd) print "test for check cookie weibo.com" print fetcher.check_cookie() print "test for check user exist..." print fetcher.check_user(uid) print "test for fetch weibo..." url = "http://weibo.cn/%s?page=%s" % (uid, page) html = fetcher.fetch(url) with codecs.open(f_weibos, "w", "utf-8") as f: f.write(html) print "test for fetch follows..." url = "http://weibo.cn/%s/follow?page=%s" % (uid, page) html = fetcher.fetch(url) with codecs.open(f_follows, "w", "utf-8") as f: f.write(html) print "test for fetch fans..." url = "http://weibo.cn/%s/fans?page=%s" % (uid, page) html = fetcher.fetch(url) with codecs.open(f_fans, "w", "utf-8") as f: f.write(html) print "test for fetch infos..." url = "http://weibo.cn/%s/info" % uid html = fetcher.fetch(url) with codecs.open(f_infos, "w", "utf-8") as f: f.write(html) cost_time = int(time.time() - start) print "finished: # connections: %s, cost time: %s" % (fetcher.n_connections, cost_time)
def TestCnWeiboCrawler(user, pwd): uid = 10029 # uid = 10057 # uid = 10111 # uid = 10145 # uid = 10211 # uid = 10318 # uid = 10361 # uid = 10392 store_path = './file/' fetcher = CnWeiboFetcher(username=user, password=pwd) start = time.time() login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() fetcher.n_connections = 0 print 'crawl follows' crawler = CnWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_follows() fetcher.n_connections = 0 print 'crawl fans' crawler = CnWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_fans() cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)
def TestCnWeiboCrawler(user, pwd): uid = 10029 # uid = 10057 # uid = 10111 # uid = 10145 # uid = 10211 # uid = 10318 # uid = 10361 # uid = 10392 store_path = './file/' fetcher = CnWeiboFetcher(username=user, password=pwd) start = time.time() login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() fetcher.n_connections = 0 print 'crawl follows' crawler = CnWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_follows() fetcher.n_connections = 0 print 'crawl fans' crawler = CnWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_fans() cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' %(fetcher.n_connections, cost_time)
def on_login(self, event): website = SITE_CHOICES[self.website.GetCurrentSelection()] account = self.account.GetValue().strip().encode('UTF-8') password = self.password.GetValue().strip().encode('UTF-8') if (account is None or len(account) == 0 or password is None or len(password) == 0): wx.MessageBox( message='Account/Password cannot be blank. Please retry!', caption='Warning', style=wx.OK | wx.ICON_INFORMATION) else: if website in [settings.COMWEIBO, settings.CNWEIBO]: if website == settings.COMWEIBO: fetcher = ComWeiboFetcher(username=account, password=password, window=self) elif website == settings.CNWEIBO: fetcher = CnWeiboFetcher(username=account, password=password, window=self) else: return if fetcher.check_cookie(): fetcher.window = None self.Destroy() wx.GetApp().ExitMainLoop() import win_local_crawler as wlc wlc.main(account_display=account, website_display=website, fetcher=fetcher) elif website == settings.TWITTER: wx.MessageBox( message='For Twitter: not implemented. Please retry!', caption='Error', style=wx.OK | wx.ICON_INFORMATION) elif website == settings.FACEBOOK: wx.MessageBox( message='For Facebook: not implemented. Please retry!', caption='Error', style=wx.OK | wx.ICON_INFORMATION)
def TestWeibo__init__(user, pwd, weibo_com): if weibo_com: fetcher = ComWeiboFetcher(username=user, password=pwd) else: fetcher = CnWeiboFetcher(username=user, password=pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() uids = [1000000253, 10057, 10029] msg_urls = [ 'http://weibo.com/1000000253/ezC36cq3i6G', 'http://weibo.com/1713926427/A2V5CENGU' ] start = time.time() print 'crawl weibos' sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl follows' sina_weibo.main(fetcher, fetch_data='follows', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl fans' sina_weibo.main(fetcher, fetch_data='fans', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl infos' sina_weibo.main(fetcher, fetch_data='infos', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl reposts' sina_weibo.main(fetcher, store_path='./file/', msg_urls=msg_urls, fetch_data='repost', weibo_com=weibo_com) print 'crawl comments' sina_weibo.main(fetcher, store_path='./file/', msg_urls=msg_urls, fetch_data='comment', weibo_com=weibo_com) cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)