def on_start(self, event): website = SITE_CHOICES[self.website.GetCurrentSelection()] account = self.account.GetValue().strip().encode('UTF-8') password = self.password.GetValue().strip().encode('UTF-8') if (account is None or len(account) == 0 or password is None or len(password) == 0): wx.MessageBox(message='Account/Password cannot be blank. Please retry!', caption='Warning', style=wx.OK|wx.ICON_INFORMATION) return #login if website == settings.SINA_WEIBO: fetcher = ComWeiboFetcher(username=account, password=password, window=self) if fetcher.check_cookie(): crawler = StartCrawl(self.host_fetcher, website, fetcher, self.store_path, self) crawler.start() self.panel.Enable(False) self.logs_txt.Enable(True) elif website == settings.TWITTER: wx.MessageBox('Not Implemented') elif website == settings.FACEBOOK: wx.MessageBox('Not Implemented')
def on_start(self, event): website = SITE_CHOICES[self.website.GetCurrentSelection()] account = self.account.GetValue().strip().encode('UTF-8') password = self.password.GetValue().strip().encode('UTF-8') if (account is None or len(account) == 0 or password is None or len(password) == 0): wx.MessageBox( message='Account/Password cannot be blank. Please retry!', caption='Warning', style=wx.OK | wx.ICON_INFORMATION) return #login if website == settings.SINA_WEIBO: fetcher = ComWeiboFetcher(username=account, password=password, window=self) if fetcher.check_cookie(): crawler = StartCrawl(self.host_fetcher, website, fetcher, self.store_path, self) crawler.start() self.panel.Enable(False) self.logs_txt.Enable(True) elif website == settings.TWITTER: wx.MessageBox('Not Implemented') elif website == settings.FACEBOOK: wx.MessageBox('Not Implemented')
def on_login(self, event): website = SITE_CHOICES[self.website.GetCurrentSelection()] account = self.account.GetValue().strip().encode("UTF-8") password = self.password.GetValue().strip().encode("UTF-8") if account is None or len(account) == 0 or password is None or len(password) == 0: wx.MessageBox( message="Account/Password cannot be blank. Please retry!", caption="Warning", style=wx.OK | wx.ICON_INFORMATION, ) else: if website == settings.SINA_WEIBO: fetcher = ComWeiboFetcher(username=account, password=password, window=self) if fetcher.check_cookie(): fetcher.window = None self.Destroy() wx.GetApp().ExitMainLoop() import win_local_crawler as wlc wlc.main(account_display=account, website_display=website, fetcher=fetcher) elif website == settings.TWITTER: wx.MessageBox( message="For Twitter: not implemented. Please retry!", caption="Error", style=wx.OK | wx.ICON_INFORMATION, ) elif website == settings.FACEBOOK: wx.MessageBox( message="For Facebook: not implemented. Please retry!", caption="Error", style=wx.OK | wx.ICON_INFORMATION, )
def on_login(self, event): website = SITE_CHOICES[self.website.GetCurrentSelection()] account = self.account.GetValue().strip().encode('UTF-8') password = self.password.GetValue().strip().encode('UTF-8') if (account is None or len(account) == 0 or password is None or len(password) == 0): wx.MessageBox( message='Account/Password cannot be blank. Please retry!', caption='Warning', style=wx.OK | wx.ICON_INFORMATION) else: if website in [settings.COMWEIBO, settings.CNWEIBO]: if website == settings.COMWEIBO: fetcher = ComWeiboFetcher(username=account, password=password, window=self) elif website == settings.CNWEIBO: fetcher = CnWeiboFetcher(username=account, password=password, window=self) else: return if fetcher.check_cookie(): fetcher.window = None self.Destroy() wx.GetApp().ExitMainLoop() import win_local_crawler as wlc wlc.main(account_display=account, website_display=website, fetcher=fetcher) elif website == settings.TWITTER: wx.MessageBox( message='For Twitter: not implemented. Please retry!', caption='Error', style=wx.OK | wx.ICON_INFORMATION) elif website == settings.FACEBOOK: wx.MessageBox( message='For Facebook: not implemented. Please retry!', caption='Error', style=wx.OK | wx.ICON_INFORMATION)
def TestWeibo__init__(user, pwd, weibo_com): if weibo_com: fetcher = ComWeiboFetcher(username=user, password=pwd) else: fetcher = CnWeiboFetcher(username=user, password=pwd) login_ok = fetcher.check_cookie() if not login_ok: print "login failed." sys.exit() uids = [1000000253, 10057, 10029] msg_urls = ["http://weibo.com/1000000253/ezC36cq3i6G", "http://weibo.com/1713926427/A2V5CENGU"] start = time.time() print "crawl weibos" sina_weibo.main(fetcher, fetch_data="weibos", store_path="./file/", uids=uids, weibo_com=weibo_com) print "crawl follows" sina_weibo.main(fetcher, fetch_data="follows", store_path="./file/", uids=uids, weibo_com=weibo_com) print "crawl fans" sina_weibo.main(fetcher, fetch_data="fans", store_path="./file/", uids=uids, weibo_com=weibo_com) print "crawl infos" sina_weibo.main(fetcher, fetch_data="infos", store_path="./file/", uids=uids, weibo_com=weibo_com) print "crawl reposts" sina_weibo.main(fetcher, store_path="./file/", msg_urls=msg_urls, fetch_data="repost", weibo_com=weibo_com) print "crawl comments" sina_weibo.main(fetcher, store_path="./file/", msg_urls=msg_urls, fetch_data="comment", weibo_com=weibo_com) cost_time = int(time.time() - start) print "finished: # connections: %s, cost time: %s" % (fetcher.n_connections, cost_time)
def on_login(self, event): website = SITE_CHOICES[self.website.GetCurrentSelection()] account = self.account.GetValue().strip().encode('UTF-8') password= self.password.GetValue().strip().encode('UTF-8') if (account is None or len(account) == 0 or password is None or len(password) == 0): wx.MessageBox(message='Account/Password cannot be blank. Please retry!', caption='Warning', style=wx.OK|wx.ICON_INFORMATION) else: if website in [settings.COMWEIBO, settings.CNWEIBO]: if website == settings.COMWEIBO: fetcher = ComWeiboFetcher(username=account, password=password, window=self) elif website == settings.CNWEIBO: fetcher = CnWeiboFetcher(username=account, password=password, window=self) else: return if fetcher.check_cookie(): fetcher.window = None self.Destroy() wx.GetApp().ExitMainLoop() import unix_local_crawler as ulc ulc.main(account_display=account, website_display=website, fetcher=fetcher) elif website == settings.TWITTER: wx.MessageBox(message='For Twitter: not implemented. Please retry!', caption='Error', style=wx.OK|wx.ICON_INFORMATION) elif website == settings.FACEBOOK: wx.MessageBox(message='For Facebook: not implemented. Please retry!', caption='Error', style=wx.OK|wx.ICON_INFORMATION)
# encoding: utf-8 from sina_weibo.fetcher import ComWeiboFetcher import sina_weibo as sw import sys import time import memstorage import account from thread_pool import WorkerManager fetcher = ComWeiboFetcher(username=account.user, password=account.pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() fans = [] follows = [] sw.main(fetcher, fetch_data='follows', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=follows) sw.main(fetcher, fetch_data='fans', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=fans) friends_list = list(set(fans)|set(follows)) print friends_list #host's weibo sw.main(fetcher,fetch_data='weibos',store_path='./file/',uids=memstorage.users_id_moniterd) #friends' weibo
def TestComWeiboFetcher(user, pwd): uid = 1039646267 # uid = 3079645245 # uid = 1043325954 # uid = 1806128454 # uid = 1002697421 # uid = 3087118795 # uid = 3045056321 # uid = 3104811705 # uid = 2901331743 # uid = 1021 # uid = 3207638224 page = 1 msg_id = "10036505028" msg_url = "http://weibo.com/1000000253/ezC36cq3i6G" f_weibos = "./file/weibos-%s.txt" % (uid) f_follows = "./file/follows-%s.txt" % (uid) f_fans = "./file/fans-%s.txt" % (uid) f_infos = "./file/infos-%s.txt" % (uid) f_reposts = "./file/reposts-%s.txt" % (msg_id) f_comments = "./file/comments-%s.txt" % (msg_id) # ------------------------------------------- start = time.time() fetcher = ComWeiboFetcher(username=user, password=pwd) print "test for check cookie weibo.com" print fetcher.check_cookie() print "test for check user exist..." print fetcher.check_user(uid) print "test for check message exist..." print fetcher.check_message(msg_url) print "test for fetch weibo..." html = fetcher.fetch_weibo(uid=uid, page=page) with codecs.open(f_weibos, "w", "utf-8") as f: f.write(html) print "test for fetch follows..." url = "http://weibo.com/%s/follow?page=%s" % (uid, page) html = fetcher.fetch(url, settings.QUERY_FOLLOWS) with codecs.open(f_follows, "w", "utf-8") as f: f.write(html) print "test for fetch fans..." url = "http://weibo.com/%s/fans?page=%s" % (uid, page) html = fetcher.fetch(url, settings.QUERY_FANS) with codecs.open(f_fans, "w", "utf-8") as f: f.write(html) print "test for fetch infos..." url = "http://weibo.com/%s/info" % uid html = fetcher.fetch(url, settings.QUERY_INFO) with codecs.open(f_infos, "w", "utf-8") as f: f.write(html) print "test for fetch message repost" html, page_cnt = fetcher.fetch_msg_reposts(msg_id, page) print "total page: %s" % page_cnt with codecs.open(f_reposts, "w", "utf-8") as f: f.write(html) print "test for fetch message comment" html, page_cnt = fetcher.fetch_msg_comments(msg_id, page) print "total page: %s" % page_cnt with codecs.open(f_comments, "w", "utf-8") as f: f.write(html) cost_time = int(time.time() - start) print "finished: # connections: %s, cost time: %s" % (fetcher.n_connections, cost_time)
# encoding: utf-8 from sina_weibo.fetcher import ComWeiboFetcher import sina_weibo import sys import time import memstorage user = '******' pwd = 'ecnupass' fetcher = ComWeiboFetcher(username=user, password=pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=memstorage.users_id_moniterd, weibos_storage=memstorage.weibos_url_moniterd)
def TestComWeiboFetcher(user, pwd): uid = 1039646267 # uid = 3079645245 # uid = 1043325954 # uid = 1806128454 # uid = 1002697421 # uid = 3087118795 # uid = 3045056321 # uid = 3104811705 # uid = 2901331743 # uid = 1021 # uid = 3207638224 page = 1 msg_id = '10036505028' msg_url = 'http://weibo.com/1000000253/ezC36cq3i6G' f_weibos = './file/weibos-%s.txt' % (uid) f_follows = './file/follows-%s.txt' % (uid) f_fans = './file/fans-%s.txt' % (uid) f_infos = './file/infos-%s.txt' % (uid) f_reposts = './file/reposts-%s.txt' % (msg_id) f_comments = './file/comments-%s.txt' % (msg_id) #------------------------------------------- start = time.time() fetcher = ComWeiboFetcher(username=user, password=pwd) print 'test for check cookie weibo.com' print fetcher.check_cookie() print 'test for check user exist...' print fetcher.check_user(uid) print 'test for check message exist...' print fetcher.check_message(msg_url) print 'test for fetch weibo...' html = fetcher.fetch_weibo(uid=uid, page=page) with codecs.open(f_weibos, 'w', 'utf-8') as f: f.write(html) print 'test for fetch follows...' url = 'http://weibo.com/%s/follow?page=%s' % (uid, page) html = fetcher.fetch(url, settings.QUERY_FOLLOWS) with codecs.open(f_follows, 'w', 'utf-8') as f: f.write(html) print 'test for fetch fans...' url = 'http://weibo.com/%s/fans?page=%s' % (uid, page) html = fetcher.fetch(url, settings.QUERY_FANS) with codecs.open(f_fans, 'w', 'utf-8') as f: f.write(html) print 'test for fetch infos...' url = 'http://weibo.com/%s/info' % uid html = fetcher.fetch(url, settings.QUERY_INFO) with codecs.open(f_infos, 'w', 'utf-8') as f: f.write(html) print 'test for fetch message repost' html, page_cnt = fetcher.fetch_msg_reposts(msg_id, page) print 'total page: %s' % page_cnt with codecs.open(f_reposts, 'w', 'utf-8') as f: f.write(html) print 'test for fetch message comment' html, page_cnt = fetcher.fetch_msg_comments(msg_id, page) print 'total page: %s' % page_cnt with codecs.open(f_comments, 'w', 'utf-8') as f: f.write(html) cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)
# uid = 3104811705 page = 1 f_weibos = './test/weibos-%s.txt' %(uid) f_follows = './test/follows-%s.txt' %(uid) f_fans = './test/fans-%s.txt' %(uid) f_infos = './test/infos-%s.txt' %(uid) f_reposts = './test/reposts-%s.txt' %(msg_id) f_comments= './test/comments-%s.txt' %(msg_id) user = '' pwd = '' start = time.time() fetcher = ComWeiboFetcher(username=user, password=pwd) print 'test for check user exist...' print fetcher.check_user(uid) print 'test for check message exist...' print fetcher.check_message(msg_url) print 'test for fetch weibo...' html = fetcher.fetch_weibo(uid=uid, page=page) with codecs.open(f_weibos, 'w', 'utf-8') as f: f.write(html) print 'test for fetch follows...' url = 'http://weibo.com/%s/follow?page=%s' % (uid, page) html = fetcher.fetch(url, settings.QUERY_FOLLOWS)
def TestWeibo__init__(user, pwd, weibo_com): if weibo_com: fetcher = ComWeiboFetcher(username=user, password=pwd) else: fetcher = CnWeiboFetcher(username=user, password=pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() uids = [1000000253, 10057, 10029] msg_urls = [ 'http://weibo.com/1000000253/ezC36cq3i6G', 'http://weibo.com/1713926427/A2V5CENGU' ] start = time.time() print 'crawl weibos' sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl follows' sina_weibo.main(fetcher, fetch_data='follows', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl fans' sina_weibo.main(fetcher, fetch_data='fans', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl infos' sina_weibo.main(fetcher, fetch_data='infos', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl reposts' sina_weibo.main(fetcher, store_path='./file/', msg_urls=msg_urls, fetch_data='repost', weibo_com=weibo_com) print 'crawl comments' sina_weibo.main(fetcher, store_path='./file/', msg_urls=msg_urls, fetch_data='comment', weibo_com=weibo_com) cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)
# encoding: utf-8 from sina_weibo.fetcher import ComWeiboFetcher import sina_weibo import sys import time import memstorage fetcher = ComWeiboFetcher(username=memstorage.user, password=memstorage.pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() start = time.time() sina_weibo.main(fetcher, fetch_data='follows', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=memstorage.uids_url_moniterd) #sina_weibo.main(fetcher, fetch_data='fans', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=memstorage.uids_url2_moniterd) #a = set(memstorage.uids_url_moniterd) #b = set(memstorage.uids_url2_moniterd) #print a & b #c = list(a&b) sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=memstorage.uids_url_moniterd, weibos_storage=memstorage.weibos_url_moniterd)
# uid = 3104811705 page = 1 f_weibos = './test/weibos-%s.txt' % (uid) f_follows = './test/follows-%s.txt' % (uid) f_fans = './test/fans-%s.txt' % (uid) f_infos = './test/infos-%s.txt' % (uid) f_reposts = './test/reposts-%s.txt' % (msg_id) f_comments = './test/comments-%s.txt' % (msg_id) user = '' pwd = '' start = time.time() fetcher = ComWeiboFetcher(username=user, password=pwd) print 'test for check user exist...' print fetcher.check_user(uid) print 'test for check message exist...' print fetcher.check_message(msg_url) print 'test for fetch weibo...' html = fetcher.fetch_weibo(uid=uid, page=page) with codecs.open(f_weibos, 'w', 'utf-8') as f: f.write(html) print 'test for fetch follows...' url = 'http://weibo.com/%s/follow?page=%s' % (uid, page) html = fetcher.fetch(url, settings.QUERY_FOLLOWS)
# encoding: utf-8 from sina_weibo.fetcher import ComWeiboFetcher import sina_weibo as sw import sys import time import memstorage import account from thread_pool import WorkerManager fetcher = ComWeiboFetcher(username=account.user, password=account.pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() fans = [] follows = [] sw.main(fetcher, fetch_data='follows', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=follows) sw.main(fetcher, fetch_data='fans', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=fans)
def TestComWeiboCrawler(user, pwd): # uid = 1039646267 # uid = 3079645245 # uid = 1043325954 # uid = 1806128454 # uid = 1002697421 # uid = 3087118795 # uid = 3045056321 # uid = 3104811705 # uid = 2901331743 # uid = 1021 # uid = 3207638224 uid = 1000000253 msg_url = 'http://weibo.com/1000000253/ezC36cq3i6G' #msg_id = '10036505028' store_path = './file/' fetcher = ComWeiboFetcher(username=user, password=pwd) start = time.time() login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() fetcher.n_connections = 0 print 'crawl weibos' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_weibos() fetcher.n_connections = 0 print 'crawl follows' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_follows() fetcher.n_connections = 0 print 'crawl fans' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_fans() fetcher.n_connections = 0 print 'crawl infos' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_infos() fetcher.n_connections = 0 print 'crawl reposts' crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url) crawler.crawl_msg_reposts() fetcher.n_connections = 0 print 'crawl comments' crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url) crawler.crawl_msg_comments() cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)
def TestComWeiboCrawler(user, pwd): # uid = 1039646267 # uid = 3079645245 # uid = 1043325954 # uid = 1806128454 # uid = 1002697421 # uid = 3087118795 # uid = 3045056321 # uid = 3104811705 # uid = 2901331743 # uid = 1021 # uid = 3207638224 uid = 1000000253 msg_url = 'http://weibo.com/1000000253/ezC36cq3i6G' #msg_id = '10036505028' store_path = './file/' fetcher = ComWeiboFetcher(username=user, password=pwd) start = time.time() login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() fetcher.n_connections = 0 print 'crawl weibos' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_weibos() fetcher.n_connections = 0 print 'crawl follows' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_follows() fetcher.n_connections = 0 print 'crawl fans' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_fans() fetcher.n_connections = 0 print 'crawl infos' crawler = ComWeiboCrawler(fetcher, store_path, uid=uid) crawler.crawl_infos() fetcher.n_connections = 0 print 'crawl reposts' crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url) crawler.crawl_msg_reposts() fetcher.n_connections = 0 print 'crawl comments' crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url) crawler.crawl_msg_comments() cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' %(fetcher.n_connections, cost_time)