def on_login(self, event): website = SITE_CHOICES[self.website.GetCurrentSelection()] account = self.account.GetValue().strip().encode('UTF-8') password = self.password.GetValue().strip().encode('UTF-8') if (account is None or len(account) == 0 or password is None or len(password) == 0): wx.MessageBox( message='Account/Password cannot be blank. Please retry!', caption='Warning', style=wx.OK | wx.ICON_INFORMATION) else: if website in [settings.COMWEIBO, settings.CNWEIBO]: if website == settings.COMWEIBO: fetcher = ComWeiboFetcher(username=account, password=password, window=self) elif website == settings.CNWEIBO: fetcher = CnWeiboFetcher(username=account, password=password, window=self) else: return if fetcher.check_cookie(): fetcher.window = None self.Destroy() wx.GetApp().ExitMainLoop() import win_local_crawler as wlc wlc.main(account_display=account, website_display=website, fetcher=fetcher) elif website == settings.TWITTER: wx.MessageBox( message='For Twitter: not implemented. Please retry!', caption='Error', style=wx.OK | wx.ICON_INFORMATION) elif website == settings.FACEBOOK: wx.MessageBox( message='For Facebook: not implemented. Please retry!', caption='Error', style=wx.OK | wx.ICON_INFORMATION)
# encoding: utf-8 from sina_weibo.fetcher import ComWeiboFetcher import sina_weibo import sys import time import memstorage user = '******' pwd = 'ecnupass' fetcher = ComWeiboFetcher(username=user, password=pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=memstorage.users_id_moniterd, weibos_storage=memstorage.weibos_url_moniterd)
def TestComWeiboFetcher(user, pwd): uid = 1039646267 # uid = 3079645245 # uid = 1043325954 # uid = 1806128454 # uid = 1002697421 # uid = 3087118795 # uid = 3045056321 # uid = 3104811705 # uid = 2901331743 # uid = 1021 # uid = 3207638224 page = 1 msg_id = '10036505028' msg_url = 'http://weibo.com/1000000253/ezC36cq3i6G' f_weibos = './file/weibos-%s.txt' % (uid) f_follows = './file/follows-%s.txt' % (uid) f_fans = './file/fans-%s.txt' % (uid) f_infos = './file/infos-%s.txt' % (uid) f_reposts = './file/reposts-%s.txt' % (msg_id) f_comments = './file/comments-%s.txt' % (msg_id) #------------------------------------------- start = time.time() fetcher = ComWeiboFetcher(username=user, password=pwd) print 'test for check cookie weibo.com' print fetcher.check_cookie() print 'test for check user exist...' print fetcher.check_user(uid) print 'test for check message exist...' print fetcher.check_message(msg_url) print 'test for fetch weibo...' html = fetcher.fetch_weibo(uid=uid, page=page) with codecs.open(f_weibos, 'w', 'utf-8') as f: f.write(html) print 'test for fetch follows...' url = 'http://weibo.com/%s/follow?page=%s' % (uid, page) html = fetcher.fetch(url, settings.QUERY_FOLLOWS) with codecs.open(f_follows, 'w', 'utf-8') as f: f.write(html) print 'test for fetch fans...' url = 'http://weibo.com/%s/fans?page=%s' % (uid, page) html = fetcher.fetch(url, settings.QUERY_FANS) with codecs.open(f_fans, 'w', 'utf-8') as f: f.write(html) print 'test for fetch infos...' url = 'http://weibo.com/%s/info' % uid html = fetcher.fetch(url, settings.QUERY_INFO) with codecs.open(f_infos, 'w', 'utf-8') as f: f.write(html) print 'test for fetch message repost' html, page_cnt = fetcher.fetch_msg_reposts(msg_id, page) print 'total page: %s' % page_cnt with codecs.open(f_reposts, 'w', 'utf-8') as f: f.write(html) print 'test for fetch message comment' html, page_cnt = fetcher.fetch_msg_comments(msg_id, page) print 'total page: %s' % page_cnt with codecs.open(f_comments, 'w', 'utf-8') as f: f.write(html) cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)
def TestWeibo__init__(user, pwd, weibo_com): if weibo_com: fetcher = ComWeiboFetcher(username=user, password=pwd) else: fetcher = CnWeiboFetcher(username=user, password=pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() uids = [1000000253, 10057, 10029] msg_urls = [ 'http://weibo.com/1000000253/ezC36cq3i6G', 'http://weibo.com/1713926427/A2V5CENGU' ] start = time.time() print 'crawl weibos' sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl follows' sina_weibo.main(fetcher, fetch_data='follows', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl fans' sina_weibo.main(fetcher, fetch_data='fans', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl infos' sina_weibo.main(fetcher, fetch_data='infos', store_path='./file/', uids=uids, weibo_com=weibo_com) print 'crawl reposts' sina_weibo.main(fetcher, store_path='./file/', msg_urls=msg_urls, fetch_data='repost', weibo_com=weibo_com) print 'crawl comments' sina_weibo.main(fetcher, store_path='./file/', msg_urls=msg_urls, fetch_data='comment', weibo_com=weibo_com) cost_time = int(time.time() - start) print 'finished: # connections: %s, cost time: %s' % ( fetcher.n_connections, cost_time)
# encoding: utf-8 from sina_weibo.fetcher import ComWeiboFetcher import sina_weibo import sys import time import memstorage fetcher = ComWeiboFetcher(username=memstorage.user, password=memstorage.pwd) login_ok = fetcher.check_cookie() if not login_ok: print 'login failed.' sys.exit() start = time.time() sina_weibo.main(fetcher, fetch_data='follows', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=memstorage.uids_url_moniterd) #sina_weibo.main(fetcher, fetch_data='fans', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=memstorage.uids_url2_moniterd) #a = set(memstorage.uids_url_moniterd) #b = set(memstorage.uids_url2_moniterd) #print a & b #c = list(a&b) sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=memstorage.uids_url_moniterd, weibos_storage=memstorage.weibos_url_moniterd)