def refreshlogin(self,htmconf): reg=u"<a href=('http://login.weibo.cn/[^\u4E00-\u9FA5]*?)[>]+?[\u4E00-\u9FA5]{2}</a>" pattern=re.compile(reg) strtemp='用户身份无效,请稍候重新登录' ckjar = cookielib.MozillaCookieJar() #这里读取cookie if(os.path.exists(self.filename)): ckjar.load(self.filename, ignore_discard=True, ignore_expires=True) if(not("gsid_CTandWM" in str(ckjar)) or htmconf.find(strtemp.decode('utf8'))!=-1): if(os.path.exists(self.filename)): os.remove(self.filename) htm=self.fastlogin() loginweb=pattern.search(htm).group(1) #表示从第二个取到倒数第二个,去掉模式串中两头的单引号 loginweb=loginweb[1:-1] print loginweb params=loginweb.split(';') for param in params: print param wl=WeiboLogin(loginweb,"","") wl.getweibologin() else: WeiboCatch.findweibo(htmconf)
def refreshlogin(self, htmconf): reg = u"<a href=('http://login.weibo.cn/[^\u4E00-\u9FA5]*?)[>]+?[\u4E00-\u9FA5]{2}</a>" pattern = re.compile(reg) strtemp = '用户身份无效,请稍候重新登录' ckjar = cookielib.MozillaCookieJar() #这里读取cookie if (os.path.exists(self.filename)): ckjar.load(self.filename, ignore_discard=True, ignore_expires=True) if (not ("gsid_CTandWM" in str(ckjar)) or htmconf.find(strtemp.decode('utf8')) != -1): if (os.path.exists(self.filename)): os.remove(self.filename) htm = self.fastlogin() loginweb = pattern.search(htm).group(1) #表示从第二个取到倒数第二个,去掉模式串中两头的单引号 loginweb = loginweb[1:-1] print loginweb params = loginweb.split(';') for param in params: print param wl = WeiboLogin(loginweb, "", "") wl.getweibologin() else: WeiboCatch.findweibo(htmconf)
def loginweb1(self, sweb): try: config = ConfigParser.ConfigParser() with open('localconfig', 'r') as cfgfile: config.readfp(cfgfile) USERNAME = config.get('weibo', 'weibouser') PASSWD = config.get('weibo', 'weibopwd') except: print "no config" os._exit() soup = BeautifulSoup(sweb) ##findAll 拿到的是一系列标签的list resp1 = soup.find('input', attrs={'name': 'vk'})['value'] #resp2=soup.find('input',attrs={'name':'backURL'})['value'] flag = resp1.split('_')[0] passwordflag = "password_" + flag print passwordflag ckjar = cookielib.MozillaCookieJar(self.filename) rand = random.randint(500000000, 999999999) surl = "http://login.weibo.cn/login/?rand=" + str( rand ) + "&backURL=http%3A%2F%2Fweibo.cn%2F&backTitle=%E5%BE%AE%E5%8D%9A&vt=4&revalid=2&ns=1" print surl req = urllib2.Request(surl) req.add_header( 'Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8') #req.add_header('Accept-Encoding','gzip, deflate')#为了强制不让服器器返回的是压过过的页面 req.add_header('Accept-Language', 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3') #Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0' ) req.add_header( 'Referer', 'http://login.weibo.cn/login/?ns=1&revalid=2&backURL=http%3A%2F%2Fweibo.cn%2F&backTitle=%CE%A2%B2%A9&vt=' ) postdata = urllib.urlencode({ 'mobile': USERNAME, #账户 passwordflag: PASSWD, #密码 'remember': 'on', 'backURL': 'http%3A%2F%2Fweibo.cn%2F', 'backTitle': '微博', 'tryCount': '', 'vk': resp1, 'submit': '登录' }) req.add_data(postdata) #req.add_header('host', 'login.weibo.cn') #req.host='login.weibo.cn' opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(ckjar)) f = opener.open(req) htm = f.read().decode('utf-8') print htm for item in ckjar: print "name:" + item.name print "Value:" + item.value ckjar.save(self.filename, ignore_discard=True, ignore_expires=True) f.close() #为什么可以不用初始化对象就可以直接通过类名调用方法,当然是不可以的,运行的时候就会报错 #(这个错误很隐蔽,在编码过程中并不会报错,因为python是解释执行的 #在运行过程中就会找不到这个方法。 #解决方式,要么实例化对象后再调用方法,要么用“类方法”即在类名上加上@classmethod 修饰 #并且要把方法的第一个默认参数写成cls)这里我采用第二种方法,在类名上加修饰符 htm = re.sub(r"vt=\d+&", '', htm) htm = re.sub(r"\?vt=\d+", '', htm) WeiboCatch.findweibo(htm)
def loginweb1(self, sweb): try: config = ConfigParser.ConfigParser() with open("localconfig", "r") as cfgfile: config.readfp(cfgfile) USERNAME = config.get("weibo", "weibouser") PASSWD = config.get("weibo", "weibopwd") except: print "no config" os._exit() soup = BeautifulSoup(sweb) ##findAll 拿到的是一系列标签的list resp1 = soup.find("input", attrs={"name": "vk"})["value"] # resp2=soup.find('input',attrs={'name':'backURL'})['value'] flag = resp1.split("_")[0] passwordflag = "password_" + flag print passwordflag ckjar = cookielib.MozillaCookieJar(self.filename) rand = random.randint(500000000, 999999999) surl = ( "http://login.weibo.cn/login/?rand=" + str(rand) + "&backURL=http%3A%2F%2Fweibo.cn%2F&backTitle=%E5%BE%AE%E5%8D%9A&vt=4&revalid=2&ns=1" ) print surl req = urllib2.Request(surl) req.add_header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") # req.add_header('Accept-Encoding','gzip, deflate')#为了强制不让服器器返回的是压过过的页面 req.add_header("Accept-Language", "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3") # Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) req.add_header("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0") req.add_header( "Referer", "http://login.weibo.cn/login/?ns=1&revalid=2&backURL=http%3A%2F%2Fweibo.cn%2F&backTitle=%CE%A2%B2%A9&vt=", ) postdata = urllib.urlencode( { "mobile": USERNAME, # 账户 passwordflag: PASSWD, # 密码 "remember": "on", "backURL": "http%3A%2F%2Fweibo.cn%2F", "backTitle": "微博", "tryCount": "", "vk": resp1, "submit": "登录", } ) req.add_data(postdata) # req.add_header('host', 'login.weibo.cn') # req.host='login.weibo.cn' opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(ckjar)) f = opener.open(req) htm = f.read().decode("utf-8") print htm for item in ckjar: print "name:" + item.name print "Value:" + item.value ckjar.save(self.filename, ignore_discard=True, ignore_expires=True) f.close() # 为什么可以不用初始化对象就可以直接通过类名调用方法,当然是不可以的,运行的时候就会报错 # (这个错误很隐蔽,在编码过程中并不会报错,因为python是解释执行的 # 在运行过程中就会找不到这个方法。 # 解决方式,要么实例化对象后再调用方法,要么用“类方法”即在类名上加上@classmethod 修饰 # 并且要把方法的第一个默认参数写成cls)这里我采用第二种方法,在类名上加修饰符 htm = re.sub(r"vt=\d+&", "", htm) htm = re.sub(r"\?vt=\d+", "", htm) WeiboCatch.findweibo(htm)