예제 #1
0
    def get_response_content(self, url, headers = {}, data = None):
        '''
        获取响应数据
        '''
        content = ''
        try:
            # 关键的一步,加载模拟登录获取的cookie
            if os.path.exists(self.cookiefile):
                self.cj.revert(self.cookiefile, True, True)
                self.cookie_support = urllib2.HTTPCookieProcessor(self.cj)
                if self.proxy == '':
                    self.opener = urllib2.build_opener(self.cookie_support, urllib2.HTTPHandler)
                else:
                    self.opener = urllib2.build_opener(self.cookie_support, urllib2.ProxyHandler({'http': self.proxy}))
                urllib2.install_opener(self.opener)
            else:
                return ''

            req = self.pack_request(url = url, headers = headers, data = data)
            response = self.opener.open(req, timeout = 10)

            if response.info().get('Content-Encoding') == 'gzip':
                content = self.gzip_data(response.read())
            else:
                content = response.read()
        except urllib2.HTTPError, e:
            logError(e)
            return e.code
예제 #2
0
    def get_servertime(self):
        """
        模拟登录第一步,获取servertime、nonce等信息,用于登录时加密用户名、密码
        """
        url = 'http://login.sina.com.cn/sso/prelogin.php?entry=account&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.2)&_=%s' % self.__get_millitime(
        )
        result = {}
        servertime = None
        nonce = None
        headers = self.__get_headers()
        headers['Host'] = 'login.sina.com.cn'
        headers['Accept'] = '*/*'
        headers['Referer'] = 'http://weibo.com/'
        del headers['Accept-encoding']
        for i in range(3):
            req = self.pack_request(url, headers)
            data = urllib2.urlopen(req).read()
            p = re.compile('\((.*)\)')
            try:
                json_data = p.search(data).group(1)
                data = json.loads(json_data)
                result['servertime'] = str(data['servertime'])
                result['nonce'] = str(data['nonce'])
                result['rsakv'] = str(data['rsakv'])
                result['pubkey'] = str(data['pubkey'])
                self.pcid = str(data['pcid'])

                break
            except Exception, e:
                logError(e)
                msg = 'get severtime error!'
                logError(msg)

                continue
예제 #3
0
    def get_response_content(self, url, headers={}, data=None):
        '''
        获取响应数据
        '''
        content = ''
        try:
            # 关键的一步,加载模拟登录获取的cookie
            if os.path.exists(self.cookiefile):
                self.cj.revert(self.cookiefile, True, True)
                self.cookie_support = urllib2.HTTPCookieProcessor(self.cj)
                if self.proxy == '':
                    self.opener = urllib2.build_opener(self.cookie_support,
                                                       urllib2.HTTPHandler)
                else:
                    self.opener = urllib2.build_opener(
                        self.cookie_support,
                        urllib2.ProxyHandler({'http': self.proxy}))
                urllib2.install_opener(self.opener)
            else:
                return ''

            req = self.pack_request(url=url, headers=headers, data=data)
            response = self.opener.open(req, timeout=10)

            if response.info().get('Content-Encoding') == 'gzip':
                content = self.gzip_data(response.read())
            else:
                content = response.read()
        except urllib2.HTTPError, e:
            logError(e)
            return e.code
예제 #4
0
    def redo_login(self, login_url):
        '''
        第三步登录
        '''
        try:
            headers = self.__get_headers()
            headers[
                'Referer'] = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)'
            req = self.pack_request(login_url, headers)
            urllib2.urlopen(req)

            # 保存cookie!!
            self.cj.save(self.cookiefile, True, True)
            msg = 'login success'
            logInfo(msg)

            loginFalg = True
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('redo_login %s happened on line %d' %
                   (s[1], s[2].tb_lineno))
            logError(msg)

            loginFalg = False
예제 #5
0
    def get_servertime(self):
        """
        模拟登录第一步,获取servertime、nonce等信息,用于登录时加密用户名、密码
        """
        url = 'http://login.sina.com.cn/sso/prelogin.php?entry=account&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.2)&_=%s' % self.__get_millitime()
        result = {}
        servertime = None
        nonce = None
        headers = self.__get_headers()
        headers['Host'] = 'login.sina.com.cn'
        headers['Accept'] = '*/*'
        headers['Referer'] = 'http://weibo.com/'
        del headers['Accept-encoding']
        for i in range(3):
            req = self.pack_request(url, headers)
            data = urllib2.urlopen(req).read()
            p = re.compile('\((.*)\)')
            try:
                json_data = p.search(data).group(1)
                data = json.loads(json_data)
                result['servertime'] = str(data['servertime'])
                result['nonce'] = str(data['nonce'])
                result['rsakv'] = str(data['rsakv'])
                result['pubkey'] = str(data['pubkey'])
                self.pcid = str(data['pcid'])

                break
            except Exception, e:
                logError(e)
                msg = 'get severtime error!'
                logError(msg)

                continue
예제 #6
0
    def redo_login(self, login_url):
        '''
        第三步登录
        '''
        try:
            headers = self.__get_headers()
            headers['Referer'] = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)'
            req = self.pack_request(login_url, headers)
            urllib2.urlopen(req)

            # 保存cookie!!
            self.cj.save(self.cookiefile, True, True)
            msg = 'login success'
            logInfo(msg)

            loginFalg = True
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('redo_login %s happened on line %d' % (s[1], s[2].tb_lineno))
            logError(msg)

            loginFalg = False
예제 #7
0
    def valid_cookie(self, html=''):
        '''
        验证本地cookie文件
        '''
        html = str(html)
        if not html:
            headers = self.__get_headers()
            # 测试李开复主页,判断cookie是否过期
            html = self.get_response_content(url='http://weibo.com/kaifulee',
                                             headers=headers)

        if not html:
            msg = 'need relogin.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False

        html = str(html)
        html = html.replace('"', "'")
        if 'sinaSSOController' in html:
            p = re.compile('location\.replace\(\'(.*?)\'\)')
            try:
                login_url = p.search(html).group(1)
                headers = self.__get_headers()
                headers['Host'] = 'account.weibo.com'
                req = self.pack_request(url=login_url, headers=headers)
                result = urllib2.urlopen(req)
                self.cj.save(self.cookiefile, True, True)

                if result.info().get('Content-Encoding') == 'gzip':
                    html = self.gzipData(result.read())
                else:
                    html = result.read()
            except Exception, e:
                logError(e)
                msg = 'relogin failure.'
                logError(msg)

                self.clear_cookiedat(self.cookiefile)
                return False
예제 #8
0
    def valid_cookie(self, html = ''):
        '''
        验证本地cookie文件
        '''
        html = str(html)
        if not html:
            headers = self.__get_headers()
            # 测试李开复主页,判断cookie是否过期
            html = self.get_response_content(url = 'http://weibo.com/kaifulee', headers = headers)

        if not html:
            msg = 'need relogin.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False

        html = str(html)
        html = html.replace('"', "'")
        if 'sinaSSOController' in html:
            p = re.compile('location\.replace\(\'(.*?)\'\)')
            try:
                login_url = p.search(html).group(1)
                headers = self.__get_headers()
                headers['Host'] = 'account.weibo.com'
                req = self.pack_request(url = login_url, headers = headers)
                result = urllib2.urlopen(req)
                self.cj.save(self.cookiefile, True, True)

                if result.info().get('Content-Encoding') == 'gzip':
                    html = self.gzipData(result.read())
                else:
                    html = result.read()
            except Exception, e:
                logError(e)
                msg = 'relogin failure.'
                logError(msg)

                self.clear_cookiedat(self.cookiefile)
                return False
예제 #9
0
    def run(self):
        # print到文件用
        # reload(sys)
        # sys.setdefaultencoding('utf-8')

        try:
            searchResult = ''
            url = 'http://s.weibo.com/weibo/%s&xsort=time&scope=ori&timescope=custom:%s:%s&page=%d' \
                % (self.key, self.startTime, self.endTime, self.id)
            headers = {
                'Host': 's.weibo.com',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; rv:13.0) Gecko/20100101 Firefox/13.0.1',
                'Accept':
                'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
                'Accept-Encoding': 'gzip, deflate',
                'Connection': 'keep-alive',
                'Referer': 'http://s.weibo.com',
            }
            content = self.sina.get_response_content(url, headers)
            if content == '':
                msg = u'%s failure:获取网页内容为空!' % self.id
                logError(msg)
                searchResult = 'error'
            # 其他类型错误处理
            # 后续处理,提取微博信息

            # result = open('result.html', 'w')
            # print >> result, content
            self.fetch(content)

        except Exception, e:
            logError(e)
            searchResult = 'error'
            s = sys.exc_info()
            msg = ('SearchWeiboThread run Error %s happened on line %d' %
                   (s[1], s[2].tb_lineno))
            logError(msg)
예제 #10
0
 def clear_cookiedat(self, datpath):
     try:
         os.remove(datpath)
     except Exception, e:
         logError(e)
예제 #11
0
    def do_login(self, login_un, login_pw, door = ''):
        '''
        第二步登录
        '''
        loginFlag = False
        try:
            username = login_un
            pwd = login_pw

            url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)'
            # 构造POST体是关键!
            postdata = {
                    'entry': 'weibo',
                    'gateway': '1',
                    'from': '',
                    'savestate': '7',
                    'userticket': '1',
                    'pagerefer' : '',
                    'ssosimplelogin': '******',
                    'vsnf': '1',
                    'vsnval': '',
                    'service': 'miniblog',
                    'pwencode': 'rsa2',
                    'rsakv' : self.rsakv,
                    'encoding': 'UTF-8',
                    'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
                    'returntype': 'META',
                    'prelt' : '26',
                    }
            postdata['servertime'] = self.servertime
            postdata['nonce'] = self.nonce
            postdata['su'] = self.get_user(username)
            postdata['sp'] = self.get_pwd(pwd, self.servertime, self.nonce).lower()
            # 当需要验证码登录的时候,后续优化
            if door:
                postdata['pcid'] = self.pcid
                postdata['door'] = door.lower()

            headers = {
                    'Host': 'login.sina.com.cn',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                    'Accept-encoding': 'gzip, deflate',
                    'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
                    'Connection': 'keep-alive',
                    'Referer'  :  'http://weibo.com/',
                    'Content-Type': 'application/x-www-form-urlencoded',
                    }

            req = self.pack_request(url, headers, postdata)
            result = urllib2.urlopen(req)

            if result.info().get('Content-Encoding') == 'gzip':
                text = self.gzip_data(result.read())
            else:
                text = result.read()

            return text
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('do_login: %s happened on line %d' % (s[1], s[2].tb_lineno))
            logError(msg)

            loginFlag = False
예제 #12
0
    def login(self, login_un, login_pw):
        '''
        对外暴露的登录接口
        '''
        loginFalg = False
        try:
            try:
                # 步骤一,获取加密用servertime、nonce等信息
                stObj = self.get_servertime()
                self.servertime = stObj.get('servertime')
                self.nonce = stObj.get('nonce')
                self.pubkey = stObj.get('pubkey')
                self.rsakv = stObj.get('rsakv')
            except Exception, e:
                logError(e)
                return False

            self.get_global_id()
            # 步骤二,加密密码登录
            loginHtml = self.do_login(login_un, login_pw)
            loginHtml = loginHtml.replace('"', "'")
            try:
                p = re.compile('location\.replace\(\'(.*?)\'\)')
                login_url = p.search(loginHtml).group(1)
                if 'retcode=0' in loginHtml:
                    # 步骤三,根据步骤二跳转地址,进一步登录,获取cookie信息
                    # 这一步成功才是真的成功!!
                    return self.redo_login(login_url)

                if 'retcode=5' in loginHtml:
                    logError('password or account error.')
                    return False

                if 'retcode=4040' in loginHtml:
                    logError('do login too much times.')
                    return False

                # 需要验证码,悲剧,先报错吧!后续优化~
                if 'retcode=4049' in login_url:
                    logError('nead input verify code, return failure.')
                    return False
            except  Exception, e:
                logError(e)
                s = sys.exc_info()
                msg = ('do login %s happened on line %d' % (s[1], s[2].tb_lineno))
                logError(msg)

                loginFalg = False
예제 #13
0
    def login(self, login_un, login_pw):
        '''
        对外暴露的登录接口
        '''
        loginFalg = False
        try:
            try:
                # 步骤一,获取加密用servertime、nonce等信息
                stObj = self.get_servertime()
                self.servertime = stObj.get('servertime')
                self.nonce = stObj.get('nonce')
                self.pubkey = stObj.get('pubkey')
                self.rsakv = stObj.get('rsakv')
            except Exception, e:
                logError(e)
                return False

            self.get_global_id()
            # 步骤二,加密密码登录
            loginHtml = self.do_login(login_un, login_pw)
            loginHtml = loginHtml.replace('"', "'")
            try:
                p = re.compile('location\.replace\(\'(.*?)\'\)')
                login_url = p.search(loginHtml).group(1)
                if 'retcode=0' in loginHtml:
                    # 步骤三,根据步骤二跳转地址,进一步登录,获取cookie信息
                    # 这一步成功才是真的成功!!
                    return self.redo_login(login_url)

                if 'retcode=5' in loginHtml:
                    logError('password or account error.')
                    return False

                if 'retcode=4040' in loginHtml:
                    logError('do login too much times.')
                    return False

                # 需要验证码,悲剧,先报错吧!后续优化~
                if 'retcode=4049' in login_url:
                    logError('nead input verify code, return failure.')
                    return False
            except Exception, e:
                logError(e)
                s = sys.exc_info()
                msg = ('do login %s happened on line %d' %
                       (s[1], s[2].tb_lineno))
                logError(msg)

                loginFalg = False
예제 #14
0
 def clear_cookiedat(self, datpath):
     try:
         os.remove(datpath)
     except Exception, e:
         logError(e)
예제 #15
0
    def do_login(self, login_un, login_pw, door=''):
        '''
        第二步登录
        '''
        loginFlag = False
        try:
            username = login_un
            pwd = login_pw

            url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)'
            # 构造POST体是关键!
            postdata = {
                'entry': 'weibo',
                'gateway': '1',
                'from': '',
                'savestate': '7',
                'userticket': '1',
                'pagerefer': '',
                'ssosimplelogin': '******',
                'vsnf': '1',
                'vsnval': '',
                'service': 'miniblog',
                'pwencode': 'rsa2',
                'rsakv': self.rsakv,
                'encoding': 'UTF-8',
                'url':
                'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
                'returntype': 'META',
                'prelt': '26',
            }
            postdata['servertime'] = self.servertime
            postdata['nonce'] = self.nonce
            postdata['su'] = self.get_user(username)
            postdata['sp'] = self.get_pwd(pwd, self.servertime,
                                          self.nonce).lower()
            # 当需要验证码登录的时候,后续优化
            if door:
                postdata['pcid'] = self.pcid
                postdata['door'] = door.lower()

            headers = {
                'Host': 'login.sina.com.cn',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20100101 Firefox/17.0',
                'Accept':
                'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                'Accept-encoding': 'gzip, deflate',
                'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',
                'Connection': 'keep-alive',
                'Referer': 'http://weibo.com/',
                'Content-Type': 'application/x-www-form-urlencoded',
            }

            req = self.pack_request(url, headers, postdata)
            result = urllib2.urlopen(req)

            if result.info().get('Content-Encoding') == 'gzip':
                text = self.gzip_data(result.read())
            else:
                text = result.read()

            return text
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('do_login: %s happened on line %d' % (s[1], s[2].tb_lineno))
            logError(msg)

            loginFlag = False
예제 #16
0
                    logError('do login too much times.')
                    return False

                # 需要验证码,悲剧,先报错吧!后续优化~
                if 'retcode=4049' in login_url:
                    logError('nead input verify code, return failure.')
                    return False
            except  Exception, e:
                logError(e)
                s = sys.exc_info()
                msg = ('do login %s happened on line %d' % (s[1], s[2].tb_lineno))
                logError(msg)

                loginFalg = False
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('login: %s happened on line %d' % (s[1], s[2].tb_lineno))
            logError(msg)

            loginFalg = False

        return loginFalg


    def do_login(self, login_un, login_pw, door = ''):
        '''
        第二步登录
        '''
        loginFlag = False
        try:
예제 #17
0
                    return False

                # 需要验证码,悲剧,先报错吧!后续优化~
                if 'retcode=4049' in login_url:
                    logError('nead input verify code, return failure.')
                    return False
            except Exception, e:
                logError(e)
                s = sys.exc_info()
                msg = ('do login %s happened on line %d' %
                       (s[1], s[2].tb_lineno))
                logError(msg)

                loginFalg = False
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('login: %s happened on line %d' % (s[1], s[2].tb_lineno))
            logError(msg)

            loginFalg = False

        return loginFalg

    def do_login(self, login_un, login_pw, door=''):
        '''
        第二步登录
        '''
        loginFlag = False
        try:
            username = login_un