def init(): # 配置日志 # '[%(asctime)s]-%(levelname)s : %(message)s' logging.basicConfig( filename='weibo.log', filemode='a', format= '[%(asctime)s] - %(module)s.%(funcName)s.%(lineno)d - %(levelname)s - %(message)s', level=logging.DEBUG) # 进行模拟登录 filename = './config/account' #保存微博账号的用户名和密码,第一行为用户名,第二行为密码 WBLogin = weiboLogin.weiboLogin() if WBLogin.login(filename) == 1: print 'Login success!' else: print 'Login error!' exit() # 生成任务 reader = open('task', 'r') for line in reader: line = line.encode('utf-8') GV.task_list.put(line) # 开始执行任务 controller()
def main(username='', userurl='/u/1765475181'): globalValue.mainusername = username globalValue.mainuserurl = userurl if globalValue.isLogIn: wbpage = getWeiboPage.getWeiboPage() #uid ='/u/1765475181' #master传过来的<key value>中的value t = parserSaverThread( ) #parserSaverThread是处理爬下来的网页,把所有@的用户提取出来。并保存在userque2salve中传给slave们 t.setDaemon(False) #主线程结束,子线程不跟着结束 t.start() wbpage.get_userpage(userurl) #爬网页咯 else: filename = './conf/account' #保存微博账号的用户名和密码,第一行为用户名,第二行为密码 WBLogin = weiboLogin.weiboLogin() if WBLogin.login(filename) == 1: globalValue.isLogIn = True print 'Login success!' wbpage = getWeiboPage.getWeiboPage() #uid ='/u/1765475181' #master传过来的<key value>中的value t = parserSaverThread( ) #parserSaverThread是处理爬下来的网页,把所有@的用户提取出来。并保存在userque2salve中传给slave们 t.setDaemon(False) #主线程结束,子线程不跟着结束 t.start() wbpage.get_userpage(userurl) #爬网页咯 else: print 'Login error!' exit()
def syncWeibo(request): WBLogin = weiboLogin.weiboLogin() if WBLogin.login('*****@*****.**','1234qwer')==1: urlContent = urllib2.urlopen('http://game.weibo.com/club/forum-315-1').read() soup = BeautifulSoup(urlContent) ul = soup.find('ul',{'class':'top_topics'}) lis = ul.findAll('li',{'class':''}) for li in lis: item = { 'user':li.find('div',{'class':'col1'}).find('img')['alt'], 'link':li.find('div',{'class':'no_rep_line'}).findAll('a')[1]['href'], 'tag':li.find('a',{'class':'navy_tag'}).string, 'title':li.find('div',{'class':'no_rep_line'}).findAll('a')[1].string, 'time':li.find('div',{'class':'col2'}).find('span').string } title = "[%s][%s]%s%s"%(item['time'],item['user'],item['tag'],item['title']) link = item['link'] result = Entry.objects.filter(title=title) if len(result) == 0: entry = Entry(title=title,link=link,content='',feedid=0) entry.save() send_mail("*****@*****.**", title, link, ("smtp.163.com", 25, "*****@*****.**", "1234qwer", False)) pass pass return HttpResponse("Login success!") else: return HttpResponse("Login error!") pass
def rsaLogin(request): filesPath = os.path.join(os.path.dirname(__file__), '..', 'files').replace('\\','/') filePath = str(filesPath) + '/account.txt' WBlogin = weiboLogin.weiboLogin() resultList = WBlogin.login(filePath) if resultList == 1: print 'login successful' return HttpResponseRedirect('/weiboindex/') else: print 'login error!' return HttpResponse('ERROR') return HttpResponse('SUCCESSFUL')
def sinaRsaSpider(request): filesPath = os.path.join(os.path.dirname(__file__), '..', 'files').replace('\\', '/') filePath = str(filesPath) + '/account.txt' WBlogin = weiboLogin.weiboLogin() resultList = WBlogin.login(filePath) if resultList == 1: print 'login successful' return HttpResponseRedirect('/spider/') else: print 'login error!' return HttpResponse('ERROR') return HttpResponse('SUCCESSFUL')
def main(): categorys = ['102803_ctg1_4188_-_ctg1_4188','102803_ctg1_2088_-_ctg1_2088','102803_ctg1_5988_-_ctg1_5988','102803_ctg1_5088_-_ctg1_5088','102803_ctg1_1288_-_ctg1_1288','102803_ctg1_4288_-_ctg1_4288', '102803_ctg1_4688_-_ctg1_4688','102803_ctg1_2488_-_ctg1_2488','102803_ctg1_3288_-_ctg1_3288','102803_ctg1_5288_-_ctg1_5288','102803_ctg1_5188_-_ctg1_5188','102803_ctg1_1388_-_ctg1_1388', '102803_ctg1_4788_-_ctg1_4788','102803_ctg1_2188_-_ctg1_2188','102803_ctg1_6088_-_ctg1_6088','102803_ctg1_1199_-_ctg1_1199','102803_ctg1_2288_-_ctg1_2288','102803_ctg1_4988_-_ctg1_4988', '102803_ctg1_1988_-_ctg1_1988','102803_ctg1_4388_-_ctg1_4388','102803_ctg1_5788_-_ctg1_5788','102803_ctg1_4888_-_ctg1_4888','102803_ctg1_2588_-_ctg1_2588','102803_ctg1_3188_-_ctg1_3188', '102803_ctg1_1488_-_ctg1_1488','102803_ctg1_2688_-_ctg1_2688','102803_ctg1_5588_-_ctg1_5588','102803_ctg1_5888_-_ctg1_5888','102803_ctg1_1688_-_ctg1_1688','102803_ctg1_4588_-_ctg1_4588', '102803_ctg1_5388_-_ctg1_5388','102803_ctg1_5488_-_ctg1_5488','102803_ctg1_4488_-_ctg1_4488','102803_ctg1_1588_-_ctg1_1588','102803_ctg1_2388_-_ctg1_2388','102803_ctg1_5688_-_ctg1_5688', '102803_ctg1_6399_-_ctg1_6399','102803_ctg1_2788_-_ctg1_2788'] categorys = ['102803_ctg1_1199_-_ctg1_1199'] #需要修改 categorys = ['102803_ctg1_2288_-_ctg1_2288','102803_ctg1_4988_-_ctg1_4988', '102803_ctg1_1988_-_ctg1_1988','102803_ctg1_4388_-_ctg1_4388','102803_ctg1_5788_-_ctg1_5788','102803_ctg1_4888_-_ctg1_4888','102803_ctg1_2588_-_ctg1_2588','102803_ctg1_3188_-_ctg1_3188', '102803_ctg1_1488_-_ctg1_1488','102803_ctg1_2688_-_ctg1_2688','102803_ctg1_5588_-_ctg1_5588','102803_ctg1_5888_-_ctg1_5888','102803_ctg1_1688_-_ctg1_1688','102803_ctg1_4588_-_ctg1_4588', '102803_ctg1_5388_-_ctg1_5388','102803_ctg1_5488_-_ctg1_5488','102803_ctg1_4488_-_ctg1_4488','102803_ctg1_1588_-_ctg1_1588','102803_ctg1_2388_-_ctg1_2388','102803_ctg1_5688_-_ctg1_5688', '102803_ctg1_6399_-_ctg1_6399','102803_ctg1_2788_-_ctg1_2788'] categorys = ['102803_ctg1_5688_-_ctg1_5688'] username = '******' pwd = 'nan18756072542' WBLogin = weiboLogin.weiboLogin() if WBLogin.login(username,pwd)==1: print 'Login success!' for category in categorys: i = 1 while True: print u'正在获取第' + str(i) + '页内容、、、' page01 = getWeiboPage.getWeiboPage(category,i).get_firstpage() if page01 == 0: break else: #matcher.matcher(page01).pageAnalyse() matcher.matcher(page01).insertContents() time.sleep(5) page02 = getWeiboPage.getWeiboPage(category,i).get_secondpage() if page02 == 0: break else: #matcher.matcher(page02).pageAnalyse() matcher.matcher(page02).insertContents() time.sleep(10) page03 = getWeiboPage.getWeiboPage(category,i).get_thirdpage() if page03 == 0: break else: #matcher.matcher(page03).pageAnalyse() matcher.matcher(page03).insertContents() time.sleep(30) i += 1 time.sleep(60) else: print 'Login error!' exit()
def main(): urlheader='http://s.weibo.com/weibo/' para=raw_input('请输入搜索内容:\n') if ' ' in para: keywords = para.replace(' ','%20') else: keywords = para print keywords page = 1 reg1=re.compile(r'\\u4f60\\u7684\\u884c\\u4e3a\\u6709\\u4e9b\\u5f02\\u5e38\\uff0c\\u8bf7\\u8f93\\u5165\\u9a8c\\u8bc1\\u7801\\uff1a') #你的行为有些异常,请输入验证码 reg2=re.compile(r'\\u62b1\\u6b49\\uff0c\\u672a\\u627e\\u5230')#抱歉,未找到搜索结果 username = '******' username = '******' pwd = 'nan18756072542' WBLogin = weiboLogin.weiboLogin() if WBLogin.login(username,pwd)==1: print '登录成功。。。' user=True #帐号可用 while page<=50 and user: url=urlheader+keywords+'&page='+str(page) print '获取第%d页。。' % page f=urllib2.urlopen(url) #print f.read() ###开始匹配网页内容### for line in f: if re.search(r'pid":"pl_weibo_direct"',line): #匹配一定要准确!!! "pid":"pl_weibo_direct" print line if reg2.search(line): print '抱歉,未找到结果。。。' return else: page += 1 #matcher01.Matcher01(para,line).pageAnalyse() matcher01.Matcher01(para,line).insertContents() stop = random.randint(0,120) print stop time.sleep(stop) break if re.search(r'"pid":"pl_common_sassfilter"',line): #"pid":"pl_common_sassfilter" if reg1.search(line): print '此帐号被锁,使用下一个帐号' user=False #帐号不可用
def init(): # 配置日志 # '[%(asctime)s]-%(levelname)s : %(message)s' logging.basicConfig(filename='weibo.log',filemode='a',format='[%(asctime)s] - %(module)s.%(funcName)s.%(lineno)d - %(levelname)s - %(message)s',level=logging.DEBUG) # 进行模拟登录 filename = './config/account'#保存微博账号的用户名和密码,第一行为用户名,第二行为密码 WBLogin = weiboLogin.weiboLogin() if WBLogin.login(filename)==1: print 'Login success!' else: print 'Login error!' exit() # 生成任务 reader = open('task','r'); for line in reader: line = line.encode('utf-8'); GV.task_list.put(line); # 开始执行任务 controller();
def init(): # 配置日志 # '[%(asctime)s]-%(levelname)s : %(message)s' logging.basicConfig(filename='weibo.log',filemode='a',format='[%(asctime)s] - %(module)s.%(funcName)s.%(lineno)d - %(levelname)s - %(message)s',level=logging.DEBUG) # 进行模拟登录 filename = './config/account'#保存微博账号的用户名和密码,第一行为用户名,第二行为密码 WBLogin = weiboLogin.weiboLogin() if WBLogin.login(filename)==1: print 'Login success!' else: print 'Login error!' exit() db = WeiboDB(); # 构造知识词条字典 abr_list = db.select("SELECT abrid, kl FROM `abbreviation`"); for abr in abr_list: GV.dict_klg[abr[1]] = abr[0]; #for abr in dict_klg.keys(): # print abr + ' ' + str(dict_klg[abr]); # 生成任务 dict_user = {}; user_list = db.select("SELECT uid, fui FROM `userlist`"); for item in user_list: dict_user[item[0]] = 1; uid_list = item[1].strip("'").split("','"); for uid in uid_list: dict_user[uid] = 1; for uid in dict_user.keys(): GV.task_list.put(uid); #task_list.put('715545693'); #task_list.put('1069205631'); #task_list.put('1649173367'); # 开始执行任务 controller();
def get_uid(filename,uid_list): fread = file(filename) for line in fread: uid_list.append(line.strip()) def writefile(filename,content): fw = file(filename,'a') fw.write(content) fw.close() if __name__ == '__main__': username = '' pwd = '' WBLogin = weiboLogin.weiboLogin() if(WBLogin.login(username, pwd)=='servertime_error'): print 'login failed. check out your network.' sys.exit() uid_list=[] get_uid('C:/Result1.txt',uid_list) path='C:/weibodata' if not os.path.exists(path): os.mkdir(path) for uid in uid_list: try: WBpage = getWeiboPage.getWeiboPage() WBpage.get_msg(uid) except Exception as e: writefile('C:/id.txt',str(uid)+'\n')
# -*- coding:UTF-8 -*- __author__ = 'gancj' __data__ = '2015-06-19 17:11' __mail__ = '[email protected]/[email protected]' #!/usr/bin/env python # -*- coding: utf-8 -*- import weiboLogin import urllib import urllib2 username = '******' pwd = 'yourpwd' WBLogin = weiboLogin.weiboLogin() WBLogin.login(username, pwd)