Esempio n. 1
0
    def redo_login(self, login_url):
        '''
        第三步登录
        '''
        try:
            headers = self.__get_headers()
            headers[
                'Referer'] = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)'
            req = self.pack_request(login_url, headers)
            urllib2.urlopen(req)

            # 保存cookie!!
            self.cj.save(self.cookiefile, True, True)
            msg = 'login success'
            logInfo(msg)

            loginFalg = True
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('redo_login %s happened on line %d' %
                   (s[1], s[2].tb_lineno))
            logError(msg)

            loginFalg = False
Esempio n. 2
0
def run():
    loginValid = False

    # 模拟登录
    username = syscontext.user.get('un', 'wwang1969@126')
    password = syscontext.user.get('pw', 'w196988')
    file_path = syscontext.config.get('temp', './temp')
    httpproxy = syscontext.config.get('httpproxy',
                                      'http://web-proxy.oa.com:8080')
    # 公司网络,必须走代理
    sina = LoginSinaWeibo(soft_path=file_path, proxy=httpproxy)
    # sina = LoginSinaWeibo(soft_path = file_path)
    if sina.check_cookie(username, password, file_path):
        loginValid = True
        logInfo('sina weibo login sucess!')
    else:
        loginValid = False
        logInfo('sina weibo login failure, check username/password!')

    if loginValid:
        timePref = time.strftime("%Y-%m-%d-", time.localtime())
        start = timePref + '0'
        end = timePref + '23'
        thread1 = SearchWeiboThread(1, start, end, sina)
        thread1.start()
Esempio n. 3
0
    def saveToDB(self):
        '''
        结果写入SQLite3
        '''
        weiboDB.connect()
        weiboDB.create_tables([Weibo], safe=True)
        if self.weibolist:
            for i in range(len(self.weibolist)):
                weiboData = self.weibolist[i]
                Weibo.create(mid = weiboData.mid, name = weiboData.name, userurl = weiboData.userurl, \
                    content = weiboData.content, weibourl = weiboData.weibourl)

        logInfo('save over~')
Esempio n. 4
0
    def analyze(self):
        '''
        文本分词,文本情感分析
        '''
        # print到终端用
        # reload(sys)
        # sys.setdefaultencoding('gb18030')

        if self.weibolist:
            for i in range(len(self.weibolist)):
                weibo = self.weibolist[i]
                if u'手机QQ浏览器' not in weibo.content:
                    seg_list = jieba.cut(weibo.content)
                    logInfo('|'.join(seg_list))
    def redo_login(self, login_url):
        '''
        第三步登录
        '''
        try:
            headers = self.__get_headers()
            headers['Referer'] = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)'
            req = self.pack_request(login_url, headers)
            urllib2.urlopen(req)

            # 保存cookie!!
            self.cj.save(self.cookiefile, True, True)
            msg = 'login success'
            logInfo(msg)

            loginFalg = True
        except Exception, e:
            logError(e)
            s = sys.exc_info()
            msg = ('redo_login %s happened on line %d' % (s[1], s[2].tb_lineno))
            logError(msg)

            loginFalg = False
    def check_cookie(self, un, pw, softPath):
        '''
        检查本地cookie文件
        '''
        loginFalg = True
        self.cookiefile = os.path.join(softPath, 'cookie.dat')
        if os.path.exists(self.cookiefile):
            msg = 'cookie dat exist.'
            logInfo(msg)
            if 'Set-Cookie' not in open(self.cookiefile, 'r').read():
                msg = 'but does not contain a valid cookie.'
                logInfo(msg)

                loginFalg = self.login(un, pw)
        else:
            msg = 'cookie dat not exist.'
            logInfo(msg)
            loginFalg = self.login(un, pw)

        if loginFalg:
            return self.valid_cookie()
        else:
            return False
Esempio n. 7
0
    def check_cookie(self, un, pw, softPath):
        '''
        检查本地cookie文件
        '''
        loginFalg = True
        self.cookiefile = os.path.join(softPath, 'cookie.dat')
        if os.path.exists(self.cookiefile):
            msg = 'cookie dat exist.'
            logInfo(msg)
            if 'Set-Cookie' not in open(self.cookiefile, 'r').read():
                msg = 'but does not contain a valid cookie.'
                logInfo(msg)

                loginFalg = self.login(un, pw)
        else:
            msg = 'cookie dat not exist.'
            logInfo(msg)
            loginFalg = self.login(un, pw)

        if loginFalg:
            return self.valid_cookie()
        else:
            return False
Esempio n. 8
0
    def fetch(self, content):
        '''
        1. 提取json数据, 关键词 <script>STK && STK\.pageletM && STK\.pageletM\.view\((.*)\).*?</script> ;
        2. 提取weibo列表,关键词 <div mid=\\"\d*\\" action-type=\\"feed_list_item\\"> ;
        3.
        '''
        # 剔除Emoji
        # try:
        #     # UCS-4
        #     highpoints = re.compile(u'([\U00002600-\U000027BF])|([\U0001f300-\U0001f64F])|([\U0001f680-\U0001f6FF])')
        # except re.error:
        #     # UCS-2
        #     highpoints = re.compile(u'([\u2600-\u27BF])|([\uD83C][\uDF00-\uDFFF])|([\uD83D][\uDC00-\uDE4F])|([\uD83D][\uDE80-\uDEFF])')
        # content = highpoints.sub(u'\u25FD', content)

        pattern = re.compile(
            r'<script>STK && STK\.pageletM && STK\.pageletM\.view\((.*)\).*?</script>'
        )
        result = pattern.findall(content)
        if result:
            # 遍历,提取json数据
            for i in range(len(result)):
                strContent = result[i]

                if '"pl_weibo_direct"' in strContent:
                    decodejson = json.loads(strContent)
                    htmlDoc = decodejson['html']

                    # result2 = open('result2.html', 'w')
                    # print >> result2, htmlDoc

                    # print到终端用
                    # reload(sys)
                    # sys.setdefaultencoding('gb18030')

                    soup = BeautifulSoup(htmlDoc)
                    li = soup.find_all('div',
                                       {'action-type': 'feed_list_item'})
                    for i in range(len(li)):
                        soupi = li[i]
                        weibo = WeiboBean()

                        weibo.mid = soupi['mid']

                        soupii = soupi.find('div',
                                            {'class': 'feed_content wbcon'})
                        weibo.name = soupii.a['nick-name']
                        weibo.userurl = soupii.a['href']
                        weibo.content = soupii.p.get_text()
                        weibo.weibourl = weibo.userurl + '/' + weibomid.midToStr(
                            weibo.mid)
                        weibo.weibourl = weibo.weibourl.replace('/u', '')

                        # logInfo(weibo)
                        self.weibolist.append(weibo)

                    break

            logInfo('weibolist size = ' + str(len(self.weibolist)))
            # self.analyze()
            self.saveToDB()
            return False
        elif '您的帐号存在异常' in html and '解除限制' in html:
            msg = u'账号被限制.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False
        elif "$CONFIG['islogin'] = '******'" in html:
            msg = u'登录失败.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False
        elif "$CONFIG['islogin']='******'" in html:
            msg = 'cookie success.'
            logInfo(msg)

            self.cj.save(self.cookiefile, True, True)
            return True
        else:
            msg = u'登录失败.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False


    def get_response_content(self, url, headers = {}, data = None):
        '''
        获取响应数据
        '''
Esempio n. 10
0
            return False
        elif '您的帐号存在异常' in html and '解除限制' in html:
            msg = u'账号被限制.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False
        elif "$CONFIG['islogin'] = '******'" in html:
            msg = u'登录失败.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False
        elif "$CONFIG['islogin']='******'" in html:
            msg = 'cookie success.'
            logInfo(msg)

            self.cj.save(self.cookiefile, True, True)
            return True
        else:
            msg = u'登录失败.'
            logError(msg)

            self.clear_cookiedat(self.cookiefile)
            return False

    def get_response_content(self, url, headers={}, data=None):
        '''
        获取响应数据
        '''
        content = ''
Esempio n. 11
0
def run():
    loginValid = False

    # 模拟登录
    username = syscontext.user.get('un', 'wwang1969@126')
    password = syscontext.user.get('pw', 'w196988')
    file_path = syscontext.config.get('temp', './temp')
    httpproxy = syscontext.config.get('httpproxy',
                                      'http://web-proxy.oa.com:8080')
    # 公司网络,必须走代理
    sina = LoginSinaWeibo(soft_path=file_path, proxy=httpproxy)
    # sina = LoginSinaWeibo(soft_path = file_path)
    if sina.check_cookie(username, password, file_path):
        loginValid = True
        logInfo('sina weibo login sucess!')
    else:
        loginValid = False
        logInfo('sina weibo login failure, check username/password!')

    if loginValid:
        timePref = time.strftime("%Y-%m-%d-", time.localtime())
        start = timePref + '0'
        end = timePref + '23'
        thread1 = SearchWeiboThread(1, start, end, sina)
        thread1.start()


if __name__ == '__main__':
    logInfo('hello')
    run()