def on_start(self, event):
     website  = SITE_CHOICES[self.website.GetCurrentSelection()]
     account  = self.account.GetValue().strip().encode('UTF-8')
     password = self.password.GetValue().strip().encode('UTF-8')
     
     if (account is None or len(account) == 0 or 
         password is None or len(password) == 0):
         wx.MessageBox(message='Account/Password cannot be blank. Please retry!',
                       caption='Warning', style=wx.OK|wx.ICON_INFORMATION)
         
         return
     
     #login
     if website == settings.SINA_WEIBO:
         fetcher = ComWeiboFetcher(username=account, password=password,
                                   window=self)
         
         if fetcher.check_cookie():
             crawler = StartCrawl(self.host_fetcher, website, fetcher, 
                                  self.store_path, self)
             crawler.start()
             self.panel.Enable(False)
             self.logs_txt.Enable(True)
                     
     elif website == settings.TWITTER:
         wx.MessageBox('Not Implemented')
     elif website == settings.FACEBOOK:
         wx.MessageBox('Not Implemented')
예제 #2
0
    def on_start(self, event):
        website = SITE_CHOICES[self.website.GetCurrentSelection()]
        account = self.account.GetValue().strip().encode('UTF-8')
        password = self.password.GetValue().strip().encode('UTF-8')

        if (account is None or len(account) == 0 or password is None
                or len(password) == 0):
            wx.MessageBox(
                message='Account/Password cannot be blank. Please retry!',
                caption='Warning',
                style=wx.OK | wx.ICON_INFORMATION)

            return

        #login
        if website == settings.SINA_WEIBO:
            fetcher = ComWeiboFetcher(username=account,
                                      password=password,
                                      window=self)

            if fetcher.check_cookie():
                crawler = StartCrawl(self.host_fetcher, website, fetcher,
                                     self.store_path, self)
                crawler.start()
                self.panel.Enable(False)
                self.logs_txt.Enable(True)

        elif website == settings.TWITTER:
            wx.MessageBox('Not Implemented')
        elif website == settings.FACEBOOK:
            wx.MessageBox('Not Implemented')
예제 #3
0
    def on_login(self, event):
        website = SITE_CHOICES[self.website.GetCurrentSelection()]
        account = self.account.GetValue().strip().encode("UTF-8")
        password = self.password.GetValue().strip().encode("UTF-8")

        if account is None or len(account) == 0 or password is None or len(password) == 0:
            wx.MessageBox(
                message="Account/Password cannot be blank. Please retry!",
                caption="Warning",
                style=wx.OK | wx.ICON_INFORMATION,
            )
        else:
            if website == settings.SINA_WEIBO:
                fetcher = ComWeiboFetcher(username=account, password=password, window=self)
                if fetcher.check_cookie():
                    fetcher.window = None

                    self.Destroy()
                    wx.GetApp().ExitMainLoop()

                    import win_local_crawler as wlc

                    wlc.main(account_display=account, website_display=website, fetcher=fetcher)
            elif website == settings.TWITTER:
                wx.MessageBox(
                    message="For Twitter: not implemented. Please retry!",
                    caption="Error",
                    style=wx.OK | wx.ICON_INFORMATION,
                )
            elif website == settings.FACEBOOK:
                wx.MessageBox(
                    message="For Facebook: not implemented. Please retry!",
                    caption="Error",
                    style=wx.OK | wx.ICON_INFORMATION,
                )
예제 #4
0
    def on_login(self, event):
        website = SITE_CHOICES[self.website.GetCurrentSelection()]
        account = self.account.GetValue().strip().encode('UTF-8')
        password = self.password.GetValue().strip().encode('UTF-8')

        if (account is None or len(account) == 0 or password is None
                or len(password) == 0):
            wx.MessageBox(
                message='Account/Password cannot be blank. Please retry!',
                caption='Warning',
                style=wx.OK | wx.ICON_INFORMATION)
        else:
            if website in [settings.COMWEIBO, settings.CNWEIBO]:
                if website == settings.COMWEIBO:
                    fetcher = ComWeiboFetcher(username=account,
                                              password=password,
                                              window=self)
                elif website == settings.CNWEIBO:
                    fetcher = CnWeiboFetcher(username=account,
                                             password=password,
                                             window=self)
                else:
                    return

                if fetcher.check_cookie():
                    fetcher.window = None

                    self.Destroy()
                    wx.GetApp().ExitMainLoop()

                    import win_local_crawler as wlc
                    wlc.main(account_display=account,
                             website_display=website,
                             fetcher=fetcher)
            elif website == settings.TWITTER:
                wx.MessageBox(
                    message='For Twitter: not implemented. Please retry!',
                    caption='Error',
                    style=wx.OK | wx.ICON_INFORMATION)
            elif website == settings.FACEBOOK:
                wx.MessageBox(
                    message='For Facebook: not implemented. Please retry!',
                    caption='Error',
                    style=wx.OK | wx.ICON_INFORMATION)
예제 #5
0
def TestWeibo__init__(user, pwd, weibo_com):

    if weibo_com:
        fetcher = ComWeiboFetcher(username=user, password=pwd)
    else:
        fetcher = CnWeiboFetcher(username=user, password=pwd)

    login_ok = fetcher.check_cookie()
    if not login_ok:
        print "login failed."
        sys.exit()

    uids = [1000000253, 10057, 10029]

    msg_urls = ["http://weibo.com/1000000253/ezC36cq3i6G", "http://weibo.com/1713926427/A2V5CENGU"]

    start = time.time()

    print "crawl weibos"
    sina_weibo.main(fetcher, fetch_data="weibos", store_path="./file/", uids=uids, weibo_com=weibo_com)

    print "crawl follows"
    sina_weibo.main(fetcher, fetch_data="follows", store_path="./file/", uids=uids, weibo_com=weibo_com)

    print "crawl fans"
    sina_weibo.main(fetcher, fetch_data="fans", store_path="./file/", uids=uids, weibo_com=weibo_com)

    print "crawl infos"
    sina_weibo.main(fetcher, fetch_data="infos", store_path="./file/", uids=uids, weibo_com=weibo_com)

    print "crawl reposts"
    sina_weibo.main(fetcher, store_path="./file/", msg_urls=msg_urls, fetch_data="repost", weibo_com=weibo_com)

    print "crawl comments"
    sina_weibo.main(fetcher, store_path="./file/", msg_urls=msg_urls, fetch_data="comment", weibo_com=weibo_com)

    cost_time = int(time.time() - start)
    print "finished: # connections: %s, cost time: %s" % (fetcher.n_connections, cost_time)
 def on_login(self, event):
     website = SITE_CHOICES[self.website.GetCurrentSelection()]
     account = self.account.GetValue().strip().encode('UTF-8')
     password= self.password.GetValue().strip().encode('UTF-8')
     
     if (account is None or len(account) == 0 or 
         password is None or len(password) == 0):
         wx.MessageBox(message='Account/Password cannot be blank. Please retry!',
                       caption='Warning', style=wx.OK|wx.ICON_INFORMATION)
     else:
         if website in [settings.COMWEIBO, settings.CNWEIBO]:
             if website == settings.COMWEIBO:
                 fetcher = ComWeiboFetcher(username=account, 
                                           password=password, 
                                           window=self)
             elif website == settings.CNWEIBO:
                 fetcher = CnWeiboFetcher(username=account,
                                          password=password,
                                          window=self)
             else:
                 return
             
             if fetcher.check_cookie():
                 fetcher.window = None
                 
                 self.Destroy()
                 wx.GetApp().ExitMainLoop()
                 
                 import unix_local_crawler as ulc
                 ulc.main(account_display=account, website_display=website,
                          fetcher=fetcher)
         elif website == settings.TWITTER:
             wx.MessageBox(message='For Twitter: not implemented. Please retry!',
                           caption='Error', style=wx.OK|wx.ICON_INFORMATION)
         elif website == settings.FACEBOOK:
             wx.MessageBox(message='For Facebook: not implemented. Please retry!',
                           caption='Error', style=wx.OK|wx.ICON_INFORMATION)
예제 #7
0
파일: demo.py 프로젝트: j0x7c4/Labrador
# encoding: utf-8


from sina_weibo.fetcher import ComWeiboFetcher
import sina_weibo as sw
import sys
import time
import memstorage
import account
from thread_pool import WorkerManager

fetcher = ComWeiboFetcher(username=account.user, password=account.pwd)

login_ok = fetcher.check_cookie()

if not login_ok:
    print 'login failed.'
    sys.exit()

fans = []
follows = []

sw.main(fetcher, fetch_data='follows', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=follows)
sw.main(fetcher, fetch_data='fans', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=fans)

friends_list = list(set(fans)|set(follows))

print friends_list
#host's weibo
sw.main(fetcher,fetch_data='weibos',store_path='./file/',uids=memstorage.users_id_moniterd)
#friends' weibo
예제 #8
0
def TestComWeiboFetcher(user, pwd):

    uid = 1039646267
    # uid = 3079645245
    # uid = 1043325954
    # uid = 1806128454
    # uid = 1002697421
    # uid = 3087118795
    # uid = 3045056321
    # uid = 3104811705
    # uid = 2901331743
    # uid = 1021
    # uid = 3207638224

    page = 1

    msg_id = "10036505028"
    msg_url = "http://weibo.com/1000000253/ezC36cq3i6G"

    f_weibos = "./file/weibos-%s.txt" % (uid)
    f_follows = "./file/follows-%s.txt" % (uid)
    f_fans = "./file/fans-%s.txt" % (uid)
    f_infos = "./file/infos-%s.txt" % (uid)
    f_reposts = "./file/reposts-%s.txt" % (msg_id)
    f_comments = "./file/comments-%s.txt" % (msg_id)

    # -------------------------------------------
    start = time.time()

    fetcher = ComWeiboFetcher(username=user, password=pwd)

    print "test for check cookie weibo.com"
    print fetcher.check_cookie()

    print "test for check user exist..."
    print fetcher.check_user(uid)

    print "test for check message exist..."
    print fetcher.check_message(msg_url)

    print "test for fetch weibo..."
    html = fetcher.fetch_weibo(uid=uid, page=page)
    with codecs.open(f_weibos, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch follows..."
    url = "http://weibo.com/%s/follow?page=%s" % (uid, page)
    html = fetcher.fetch(url, settings.QUERY_FOLLOWS)
    with codecs.open(f_follows, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch fans..."
    url = "http://weibo.com/%s/fans?page=%s" % (uid, page)
    html = fetcher.fetch(url, settings.QUERY_FANS)
    with codecs.open(f_fans, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch infos..."
    url = "http://weibo.com/%s/info" % uid
    html = fetcher.fetch(url, settings.QUERY_INFO)
    with codecs.open(f_infos, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch message repost"
    html, page_cnt = fetcher.fetch_msg_reposts(msg_id, page)
    print "total page: %s" % page_cnt
    with codecs.open(f_reposts, "w", "utf-8") as f:
        f.write(html)

    print "test for fetch message comment"
    html, page_cnt = fetcher.fetch_msg_comments(msg_id, page)
    print "total page: %s" % page_cnt
    with codecs.open(f_comments, "w", "utf-8") as f:
        f.write(html)

    cost_time = int(time.time() - start)

    print "finished: # connections: %s, cost time: %s" % (fetcher.n_connections, cost_time)
예제 #9
0
# encoding: utf-8

from sina_weibo.fetcher import ComWeiboFetcher
import sina_weibo
import sys
import time
import memstorage

user = '******'
pwd = 'ecnupass'
fetcher = ComWeiboFetcher(username=user, password=pwd)

login_ok = fetcher.check_cookie()

if not login_ok:
    print 'login failed.'
    sys.exit()

sina_weibo.main(fetcher,
                fetch_data='weibos',
                store_path='./file/',
                uids=memstorage.users_id_moniterd,
                weibos_storage=memstorage.weibos_url_moniterd)
예제 #10
0
def TestComWeiboFetcher(user, pwd):

    uid = 1039646267
    # uid = 3079645245
    # uid = 1043325954
    # uid = 1806128454
    # uid = 1002697421
    # uid = 3087118795
    # uid = 3045056321
    # uid = 3104811705
    # uid = 2901331743
    # uid = 1021
    # uid = 3207638224

    page = 1

    msg_id = '10036505028'
    msg_url = 'http://weibo.com/1000000253/ezC36cq3i6G'

    f_weibos = './file/weibos-%s.txt' % (uid)
    f_follows = './file/follows-%s.txt' % (uid)
    f_fans = './file/fans-%s.txt' % (uid)
    f_infos = './file/infos-%s.txt' % (uid)
    f_reposts = './file/reposts-%s.txt' % (msg_id)
    f_comments = './file/comments-%s.txt' % (msg_id)

    #-------------------------------------------
    start = time.time()

    fetcher = ComWeiboFetcher(username=user, password=pwd)

    print 'test for check cookie weibo.com'
    print fetcher.check_cookie()

    print 'test for check user exist...'
    print fetcher.check_user(uid)

    print 'test for check message exist...'
    print fetcher.check_message(msg_url)

    print 'test for fetch weibo...'
    html = fetcher.fetch_weibo(uid=uid, page=page)
    with codecs.open(f_weibos, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch follows...'
    url = 'http://weibo.com/%s/follow?page=%s' % (uid, page)
    html = fetcher.fetch(url, settings.QUERY_FOLLOWS)
    with codecs.open(f_follows, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch fans...'
    url = 'http://weibo.com/%s/fans?page=%s' % (uid, page)
    html = fetcher.fetch(url, settings.QUERY_FANS)
    with codecs.open(f_fans, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch infos...'
    url = 'http://weibo.com/%s/info' % uid
    html = fetcher.fetch(url, settings.QUERY_INFO)
    with codecs.open(f_infos, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch message repost'
    html, page_cnt = fetcher.fetch_msg_reposts(msg_id, page)
    print 'total page: %s' % page_cnt
    with codecs.open(f_reposts, 'w', 'utf-8') as f:
        f.write(html)

    print 'test for fetch message comment'
    html, page_cnt = fetcher.fetch_msg_comments(msg_id, page)
    print 'total page: %s' % page_cnt
    with codecs.open(f_comments, 'w', 'utf-8') as f:
        f.write(html)

    cost_time = int(time.time() - start)

    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)
예제 #11
0
# uid = 3104811705
page = 1

f_weibos  = './test/weibos-%s.txt'   %(uid)
f_follows = './test/follows-%s.txt'  %(uid)
f_fans    = './test/fans-%s.txt'     %(uid)
f_infos   = './test/infos-%s.txt'    %(uid)
f_reposts = './test/reposts-%s.txt'  %(msg_id)
f_comments= './test/comments-%s.txt' %(msg_id)

user = ''
pwd  = ''

start = time.time()

fetcher = ComWeiboFetcher(username=user, password=pwd)

print 'test for check user exist...'
print fetcher.check_user(uid)
 
print 'test for check message exist...'
print fetcher.check_message(msg_url)

print 'test for fetch weibo...'
html = fetcher.fetch_weibo(uid=uid, page=page)
with codecs.open(f_weibos, 'w', 'utf-8') as f:
    f.write(html)

print 'test for fetch follows...'
url = 'http://weibo.com/%s/follow?page=%s' % (uid, page)
html = fetcher.fetch(url, settings.QUERY_FOLLOWS)
예제 #12
0
def TestWeibo__init__(user, pwd, weibo_com):

    if weibo_com:
        fetcher = ComWeiboFetcher(username=user, password=pwd)
    else:
        fetcher = CnWeiboFetcher(username=user, password=pwd)

    login_ok = fetcher.check_cookie()
    if not login_ok:
        print 'login failed.'
        sys.exit()

    uids = [1000000253, 10057, 10029]

    msg_urls = [
        'http://weibo.com/1000000253/ezC36cq3i6G',
        'http://weibo.com/1713926427/A2V5CENGU'
    ]

    start = time.time()

    print 'crawl weibos'
    sina_weibo.main(fetcher,
                    fetch_data='weibos',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl follows'
    sina_weibo.main(fetcher,
                    fetch_data='follows',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl fans'
    sina_weibo.main(fetcher,
                    fetch_data='fans',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl infos'
    sina_weibo.main(fetcher,
                    fetch_data='infos',
                    store_path='./file/',
                    uids=uids,
                    weibo_com=weibo_com)

    print 'crawl reposts'
    sina_weibo.main(fetcher,
                    store_path='./file/',
                    msg_urls=msg_urls,
                    fetch_data='repost',
                    weibo_com=weibo_com)

    print 'crawl comments'
    sina_weibo.main(fetcher,
                    store_path='./file/',
                    msg_urls=msg_urls,
                    fetch_data='comment',
                    weibo_com=weibo_com)

    cost_time = int(time.time() - start)
    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)
예제 #13
0
# encoding: utf-8


from sina_weibo.fetcher import ComWeiboFetcher
import sina_weibo
import sys
import time
import memstorage


fetcher = ComWeiboFetcher(username=memstorage.user, password=memstorage.pwd)

login_ok = fetcher.check_cookie()

if not login_ok:
    print 'login failed.'
    sys.exit()

start = time.time()

sina_weibo.main(fetcher, fetch_data='follows', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=memstorage.uids_url_moniterd)

#sina_weibo.main(fetcher, fetch_data='fans', store_path='./file/', uids=memstorage.users_id_moniterd, uids_storage=memstorage.uids_url2_moniterd)

#a = set(memstorage.uids_url_moniterd)
#b = set(memstorage.uids_url2_moniterd)

#print a & b

#c = list(a&b)
sina_weibo.main(fetcher, fetch_data='weibos', store_path='./file/', uids=memstorage.uids_url_moniterd, weibos_storage=memstorage.weibos_url_moniterd)
예제 #14
0
# uid = 3104811705
page = 1

f_weibos = './test/weibos-%s.txt' % (uid)
f_follows = './test/follows-%s.txt' % (uid)
f_fans = './test/fans-%s.txt' % (uid)
f_infos = './test/infos-%s.txt' % (uid)
f_reposts = './test/reposts-%s.txt' % (msg_id)
f_comments = './test/comments-%s.txt' % (msg_id)

user = ''
pwd = ''

start = time.time()

fetcher = ComWeiboFetcher(username=user, password=pwd)

print 'test for check user exist...'
print fetcher.check_user(uid)

print 'test for check message exist...'
print fetcher.check_message(msg_url)

print 'test for fetch weibo...'
html = fetcher.fetch_weibo(uid=uid, page=page)
with codecs.open(f_weibos, 'w', 'utf-8') as f:
    f.write(html)

print 'test for fetch follows...'
url = 'http://weibo.com/%s/follow?page=%s' % (uid, page)
html = fetcher.fetch(url, settings.QUERY_FOLLOWS)
예제 #15
0
파일: demo.py 프로젝트: j0x7c4/Labrador
# encoding: utf-8

from sina_weibo.fetcher import ComWeiboFetcher
import sina_weibo as sw
import sys
import time
import memstorage
import account
from thread_pool import WorkerManager

fetcher = ComWeiboFetcher(username=account.user, password=account.pwd)

login_ok = fetcher.check_cookie()

if not login_ok:
    print 'login failed.'
    sys.exit()

fans = []
follows = []

sw.main(fetcher,
        fetch_data='follows',
        store_path='./file/',
        uids=memstorage.users_id_moniterd,
        uids_storage=follows)
sw.main(fetcher,
        fetch_data='fans',
        store_path='./file/',
        uids=memstorage.users_id_moniterd,
        uids_storage=fans)
예제 #16
0
def TestComWeiboCrawler(user, pwd):

    # uid = 1039646267
    # uid = 3079645245
    # uid = 1043325954
    # uid = 1806128454
    # uid = 1002697421
    # uid = 3087118795
    # uid = 3045056321
    # uid = 3104811705
    # uid = 2901331743
    # uid = 1021
    # uid = 3207638224
    uid = 1000000253

    msg_url = 'http://weibo.com/1000000253/ezC36cq3i6G'  #msg_id  = '10036505028'

    store_path = './file/'

    fetcher = ComWeiboFetcher(username=user, password=pwd)

    start = time.time()

    login_ok = fetcher.check_cookie()

    if not login_ok:
        print 'login failed.'
        sys.exit()

    fetcher.n_connections = 0
    print 'crawl weibos'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_weibos()

    fetcher.n_connections = 0
    print 'crawl follows'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_follows()

    fetcher.n_connections = 0
    print 'crawl fans'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_fans()

    fetcher.n_connections = 0
    print 'crawl infos'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_infos()

    fetcher.n_connections = 0
    print 'crawl reposts'
    crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url)
    crawler.crawl_msg_reposts()

    fetcher.n_connections = 0
    print 'crawl comments'
    crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url)
    crawler.crawl_msg_comments()

    cost_time = int(time.time() - start)

    print 'finished: # connections: %s, cost time: %s' % (
        fetcher.n_connections, cost_time)
def TestComWeiboCrawler(user, pwd):
    
    # uid = 1039646267
    # uid = 3079645245
    # uid = 1043325954
    # uid = 1806128454
    # uid = 1002697421
    # uid = 3087118795
    # uid = 3045056321
    # uid = 3104811705
    # uid = 2901331743
    # uid = 1021
    # uid = 3207638224
    uid = 1000000253

    msg_url = 'http://weibo.com/1000000253/ezC36cq3i6G' #msg_id  = '10036505028'

    store_path = './file/'

    fetcher = ComWeiboFetcher(username=user, password=pwd)

    start = time.time()
    
    login_ok = fetcher.check_cookie()
    
    if not login_ok:
        print 'login failed.'
        sys.exit()
    
    fetcher.n_connections = 0
    print 'crawl weibos'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_weibos()
    
    fetcher.n_connections = 0
    print 'crawl follows'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_follows()
     
    fetcher.n_connections = 0
    print 'crawl fans'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_fans()
    
    fetcher.n_connections = 0
    print 'crawl infos'
    crawler = ComWeiboCrawler(fetcher, store_path, uid=uid)
    crawler.crawl_infos()
    
    fetcher.n_connections = 0
    print 'crawl reposts'
    crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url)
    crawler.crawl_msg_reposts()
     
    fetcher.n_connections = 0
    print 'crawl comments'
    crawler = ComWeiboCrawler(fetcher, store_path, msg_url=msg_url)
    crawler.crawl_msg_comments()
    
    cost_time = int(time.time() - start)    
    
    print 'finished: # connections: %s, cost time: %s' %(fetcher.n_connections, cost_time)