Esempio n. 1
0
    def parse_dialog_info_old(self, dialog):
        """
        老的私信数据解析
        :param dialog:
        :return:
        """
        message_id = dialog['direct_message']['id']
        create_time = dialog['direct_message']['created_at']
        time_format = datetime.datetime.strptime(create_time,
                                                 '%a %b %d %H:%M:%S %z %Y')
        pos = str(time_format).find('+')
        created_at = str(time_format)[0:pos]
        sender_id = dialog['direct_message']['sender_id']
        recipient_id = dialog['direct_message']['recipient_id']
        sender_screen_name = dialog['direct_message']['sender_screen_name']
        recipient_screen_name = dialog['direct_message'][
            'recipient_screen_name']
        content = dialog['direct_message']['text']
        content = content.replace('\"', ' ')

        sender_img_name = '{}.jpg'.format(sender_id)
        sender_image_path = '/WeiBo/picture/{}/friends/{}'.format(
            self.weiboapi.uid, sender_img_name)
        sender_img_url = dialog.get('user').get('avatar_large')
        sender_image_abspath = self.download_dir + "/WeiBo/picture/{}/friends/".format(
            self.weiboapi.uid)

        recipient_img_name = '{}.jpg'.format(recipient_id)
        recipient_image_path = '/WeiBo/picture/{}/friends/{}'.format(
            self.weiboapi.uid, recipient_img_name)

        src_account = self.weiboapi.uid
        attach_id = ''
        attach_name = ''
        attach_url = ''

        if 'page_info' in dialog['direct_message']:
            attach_id = dialog['direct_message']['page_info']['page_id']
            attach_name = dialog['direct_message']['page_info']['page_title']
            attach_name = attach_name.replace('\"', ' ')
            attach_url = dialog['direct_message']['url_struct'][0][
                'url_type_pic']

        self.download.add_task(url=sender_img_url,
                               path=sender_image_abspath,
                               name=sender_img_name)

        sql = '''INSERT INTO "TBL_PRCD_WEIBO_PRIVATEMSG_INFO" ("strWeiboMsgId","strWeiboContent","strWeiboSenderAccid",
        "strWeiboSenderNick", "strSenderImage", "strWeiboReceiverAccid","strWeiboReceiverNick","strReceiverImage",
        "strWeiboSendTime","strWeiboAttachmentFid", "strWeiboAttachmentName","strWeiboAttachmentUrl","strSrcAccount")
        VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})'''
        params = (message_id, content, sender_id, sender_screen_name,
                  sender_image_path, recipient_id, recipient_screen_name,
                  recipient_image_path, created_at, attach_id, attach_name,
                  attach_url, src_account)

        self.sql_execute_try(utils.sql_format(sql, params))
Esempio n. 2
0
    def submit_userinfo(self):
        '''
        @获取【用户信息】并提交至数据库
        :return:
        '''
        self.log.info("【用户信息】")
        self.create_table('TBL_PRCD_WEIBO_USERINFO', False)

        user = self.weiboapi.userinfo
        self.nick_name = user.get('nick', '')
        if not user:
            sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboAccountNum", "strWeiboAccountID") VALUES({}, {})'''

            params = (
                self.weiboapi.username,
                self.weiboapi.uid,
            )

            self.sql_execute_try(utils.sql_format(sql, params))
            return

        sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboNickName", "strWeiboAccountNum", "strWeiboAccountID",
        "strWeiboGender","strUserDesc","strPhotoPath","strWeiboFaceURL","strWeiboCity","strWeiboBlog","strCreateAt",
        "strSunRank","strAttNum","strBlogNum","strFansNum") VALUES({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})'''

        params = (user.get('nick',
                           ''), self.weiboapi.username, user.get('uid', ''),
                  user.get('gender', ''), user.get('description', ''),
                  user.get('profile_image_path',
                           ''), user.get('profile_url',
                                         ''), user.get('address', ''),
                  user.get('blog', ''), user.get('created_at',
                                                 ''), user.get('urank', ''),
                  user.get('follow_count',
                           ''), user.get('statuses_count',
                                         ''), user.get('followers_count', ''))

        self.sql_execute_try(utils.sql_format(sql, params))
Esempio n. 3
0
    def parse_weibo_userinfo(self, userinfo):
        """
        解析新浪微博用户信息数据
        :return:
        """
        uid = userinfo.get('idstr', '')
        screen_name = userinfo.get('screen_name', '')
        self.nick_name = screen_name
        province = userinfo.get('province', '')
        city = userinfo.get('city', '')
        address = province + ' ' + city
        description = userinfo.get('description', '')
        blog_url = userinfo.get('url', '')
        gender = userinfo.get('gender', '')
        followers_count = userinfo.get('followers_count', '')
        friends_count = userinfo.get('friends_count', '')
        statuses_count = userinfo.get('statuses_count', '')
        favourites_count = userinfo.get('favourites_count', '')
        created_at = userinfo.get('created_at', '')
        created_at = utils.time_format(created_at)
        # 头像图片url
        profile_image_url = userinfo.get('profile_image_url',
                                         '').replace(' ', '')
        # 大号头像url
        avatar_large = userinfo.get('avatar_large', '').replace(' ', '')

        profile_image_name = uid + '.jpg'
        profile_image_relpath = "/WeiBo/picture/{}/friends/{}".format(
            self.weiboapi.uid, profile_image_name)
        profile_image_path = self.download_dir + "/WeiBo/picture/{}/friends/".format(
            self.weiboapi.uid)

        self.download.add_task(url=avatar_large,
                               path=profile_image_path,
                               name=profile_image_name)

        profile_url = 'https://m.weibo.cn/u/{}'.format(uid)

        sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboNickName", "strWeiboAccountNum", "strWeiboAccountID",
                "strWeiboGender","strUserDesc","strPhotoPath","strWeiboFaceURL","strWeiboCity","strWeiboBlog","strCreateAt",
                "strSunRank","strAttNum","strBlogNum","strFansNum") VALUES({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})'''

        params = (screen_name, self.weiboapi.username, self.weiboapi.uid,
                  gender, description, profile_image_relpath, profile_url,
                  address, blog_url, created_at, '', friends_count,
                  statuses_count, followers_count)

        return utils.sql_format(sql, params)
Esempio n. 4
0
    def parse_weibo_follower(self, fans):
        """
        解析粉丝信息
        :return:
        """
        desc = fans.get('desc1', '')

        user = {}
        user = fans.get('user', {})

        uid = user.get('id', '')
        screen_name = user.get('screen_name', '')
        # 头像图片url
        profile_image_url = user.get('profile_image_url', '').replace(' ', '')
        # 大号头像url
        avatar_large = user.get('avatar_large', '').replace(' ', '')
        followers_count = user.get('followers_count', '')
        friends_count = user.get('friends_count', '')
        status_count = user.get('status_count', '')

        profile_image_name = str(uid) + '.jpg'
        profile_image_relpath = "/WeiBo/picture/{}/friends/{}".format(
            self.weiboapi.uid, profile_image_name)
        profile_image_path = self.download_dir + "/WeiBo/picture/{}/friends/".format(
            self.weiboapi.uid)

        self.download.add_task(url=avatar_large,
                               path=profile_image_path,
                               name=profile_image_name)

        profile_url = 'https://m.weibo.cn/u/{}'.format(uid)

        sql = '''
              INSERT INTO "TBL_PRCD_WEIBO_FOLLOW_INFO" ("strWeiboNickname", "strPhotoPath", "strWeiboFaceURL","strPageURL","strUserDesc",
              "strStatusCount","strWeiboFollowerAmount","strWeiboFollowingAmount","strWeiboUID","strWeiboRank", "strSrcAccount")
               VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}) 
               '''

        params = (screen_name, profile_image_relpath, profile_url, profile_url,
                  desc, status_count, followers_count, friends_count, uid, '',
                  self.weiboapi.uid)

        return utils.sql_format(sql, params)
Esempio n. 5
0
    def parse_dialog_info_new(self, msgs, name_dict, portrait_dict):
        """
        新逆向出的私信解析
        :param msgs:
        :param name_dict:
        :param portrait_dict:
        :return:
        """
        if not isinstance(msgs, list):
            return

        for info in msgs:
            if not isinstance(info, dict):
                continue
            msg_id = info.get('mid')
            msg_time = info.get('time')
            time_format = datetime.datetime.utcfromtimestamp(msg_time)
            created_at = time_format.strftime('%Y-%m-%d %H:%M:%S')
            sender_id = str(info.get('from'))
            sender_screen_name = name_dict.get(sender_id)
            sender_image_path = portrait_dict.get(sender_id)
            recipient_id = str(info.get('to'))
            recipient_screen_name = name_dict.get(recipient_id)
            recipient_image_path = portrait_dict.get(recipient_id)
            content = info.get('content')

            # attach info
            src_account = self.weiboapi.uid
            attach_id = ''
            attach_name = ''
            attach_url = ''

            sql = '''INSERT INTO "TBL_PRCD_WEIBO_PRIVATEMSG_INFO" ("strWeiboMsgId","strWeiboContent","strWeiboSenderAccid",
            "strWeiboSenderNick", "strSenderImage", "strWeiboReceiverAccid","strWeiboReceiverNick","strReceiverImage",
            "strWeiboSendTime","strWeiboAttachmentFid", "strWeiboAttachmentName","strWeiboAttachmentUrl","strSrcAccount")
            VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})'''
            params = (msg_id, content, sender_id, sender_screen_name,
                      sender_image_path, recipient_id, recipient_screen_name,
                      recipient_image_path, created_at, attach_id, attach_name,
                      attach_url, src_account)

            self.sql_execute_try(utils.sql_format(sql, params))
Esempio n. 6
0
    def submit_api_photos_info(self):
        '''
        获取相册照片
        :return:
        '''
        self.log.info("【照片列表】")
        self.create_table('TBL_PRCD_WEIBO_PHOTO_INFO', False)

        since_id = 0
        while True:
            card_list, next_id = self.weiboapi.get_api_photos_list(since_id)
            for card in card_list:
                if card['card_type'] == 47:
                    for pic in card['pics']:
                        if 'pic_id' in pic:
                            pic_id = pic['pic_id']
                            pic_url = pic['pic_big']
                            blog_id = pic['mblog']['id']
                            blog_text = pic['mblog']['text']
                            pic_name = str(pic_id) + '.jpg'
                            pic_path = '/WeiBo/picture/{}/photos/{}'.format(
                                self.weiboapi.uid, pic_name)
                            pic_abspath = self.download_dir + '/WeiBo/picture/{}/photos/'.format(
                                self.weiboapi.uid)

                            try:
                                download_file(pic_url, pic_abspath, pic_name)
                            except Exception as e:
                                self.log.error("照片下载你失败:[{}] {}".format(
                                    pic_url, e))
                                continue
                            sql = '''INSERT INTO "TBL_PRCD_WEIBO_PHOTO_INFO" ("PhotoID","PhotoURL","LocalPath","MblogID","MblogText","strSrcAccount") 
                                    VALUES({}, {}, {}, {}, {}, {})'''
                            params = (pic_id, pic_url, pic_path, blog_id,
                                      blog_text, self.weiboapi.uid)
                            self.sql_execute_try(utils.sql_format(sql, params))

            if next_id == 0:
                break
            since_id = next_id
        self.download.download_wait()
Esempio n. 7
0
    def submit_bloginfo(self):
        '''
        @获取并提交微博信息
        :param page_space: 一次获取并提交的分页数,page_sapce=None 一次性提交
        :return:
        '''
        self.log.info("【微博信息】")
        self.create_table('TBL_PRCD_WEIBO_BLOGINFO', False)

        sql = '''INSERT INTO "TBL_PRCD_WEIBO_BLOGINFO" ("strWeiboSessionID","strWeiboContent","strWeiboSendTime","strSrcAccount",
                "strWeiboSenderNick","strSource","strWeiboAttiCount","strWeiboForwardCount","strWeiboCommentCount",
                "strPrevCreatedAt","strPrevMblogID","strPrevContent","strPrevUID","strPrevSource") VALUES
                ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})
                '''

        page_total, blog_total = self.weiboapi.weibo_amount
        page_list = []

        for page in range(1, page_total + 1):
            count, one_list = self.weiboapi.get_weibo_onepage(page)
            for mblog in one_list:
                try:
                    params = (mblog.get('mblog_id',
                                        ''), mblog.get('content', ''),
                              mblog.get('created_at',
                                        ''), mblog.get('from', ''),
                              mblog.get('nick', ''), self.weiboapi.account,
                              mblog.get('attitudes_count',
                                        ''), mblog.get('reposts_count', ''),
                              mblog.get('comments_count',
                                        ''), mblog.get('prev_createdat', ''),
                              mblog.get('prev_id',
                                        ''), mblog.get('prev_content', ''),
                              mblog.get('prev_uid',
                                        ''), mblog.get('prev_source', ''))
                    self.sql_execute_try(utils.sql_format(sql, params))
                except Exception as e:
                    self.log.error("获取用户【微博信息】异常. {}\r\n 错误信息:{}".format(
                        str(mblog), e))
                    continue
Esempio n. 8
0
    def submit_follower_info(self):
        '''
        @获取【粉丝】信息并提交至数据库
        :return:
        '''
        self.log.info("【在关注者信息】")
        self.create_table('TBL_PRCD_WEIBO_FOLLOW_INFO', False)

        sql = '''
            INSERT INTO "TBL_PRCD_WEIBO_FOLLOW_INFO" ("strWeiboNickname","strPhotoPath","strPageURL","strUserDesc",
            "strStatusCount","strWeiboFollowerAmount","strWeiboFollowingAmount","strWeiboUID","strWeiboRank", "strSrcAccount")
             VALUES ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}) 
             '''
        page_total, follow_total = self.weiboapi.follower_amount
        for page in range(1, page_total + 1):
            count, one_list = self.weiboapi.get_follower_onepage(page)
            for fans in one_list:
                try:
                    profile_image_path = self.download_dir + "/WeiBo/picture/{}/friends/".format(
                        self.weiboapi.uid)
                    self.download.add_task(
                        url=fans.get('profile_image_url', ''),
                        path=profile_image_path,
                        name=fans.get('profile_image_name', ''))
                    params = (fans['nick'], fans['profile_image_path'],
                              fans['profile_url'], fans['desc'],
                              fans['statuses_count'], fans['followers_count'],
                              fans['follow_count'], fans['uid'], fans['rank'],
                              self.weiboapi.uid)

                    self.sql_execute_try(utils.sql_format(sql, params))
                except Exception as e:
                    self.log.error("获取用户【粉丝信息】异常. {}\r\n 错误信息:{}".format(
                        str(fans), e))
                    continue
        self.download.download_wait()
Esempio n. 9
0
    def fetch_weibo_userinfo(self):
        """
        获取新浪微博用户数据
        :return:
        """

        self.log.info("【用户信息】")
        self.create_table('TBL_PRCD_WEIBO_USERINFO', False)

        userinfo = {}
        try:
            userinfo = self.weiboapi.weibo_userinfo()
        except Exception as e:
            self.log.exception(
                "fetch weibo userinfo exception. errorinfo:{}".format(e))

        if userinfo:
            sql = self.parse_weibo_userinfo(userinfo)
        else:
            sql = sql = '''INSERT INTO "TBL_PRCD_WEIBO_USERINFO" ("strWeiboAccountNum", "strWeiboAccountID") VALUES({}, {})'''
            params = (self.weiboapi.username, self.weiboapi.uid)
            sql = utils.sql_format(sql, params)

        self.sql_execute_try(sql)
Esempio n. 10
0
    def parse_weibo_mblog(self, mblog: dict):
        """
        解析微博数据
        :return:
        """

        mblog_id = mblog.get('id', '')
        content = mblog.get('text', '')
        source = mblog.get('source', '')
        created_at = mblog.get('created_at', '')
        created_at = utils.time_format(created_at)
        reposts_count = mblog.get('reposts_count', '')
        comments_count = mblog.get('comments_count', '')
        attitudes_count = mblog.get('attitudes_count', '')

        sender_user = mblog.get('user', {})
        sender_nick = sender_user.get('screen_name', '')

        pics_dict = {}
        pics_dict = mblog.get('pic_infos', {})

        attachments_name = ''

        for key, value in pics_dict.items():
            large_pic = value.get('large', {})
            large_url = large_pic.get('url', '').replace(' ', '')
            large_name = key + '.jpg'
            large_abspath = self.download_dir + "/WeiBo/picture/{}/photos/".format(
                self.weiboapi.uid)
            large_relpath = '/WeiBo/picture/{}/photos/{}'.format(
                self.weiboapi.uid, large_name)
            self.download.add_task(url=large_url,
                                   path=large_abspath,
                                   name=large_name)
            attachments_name += '\"{}\";'.format(large_relpath)

        retweeted_status = {}
        retweeted_status = mblog.get('retweeted_status', {})
        prev_created_at = retweeted_status.get('created_at', '')
        prev_created_at = utils.time_format(prev_created_at)
        prev_mblog_id = retweeted_status.get('id', '')
        prev_content = retweeted_status.get('text', '')
        prev_sender_user = retweeted_status.get('user', {})
        prev_sender_nick = prev_sender_user.get('screen_name', '')
        prev_sender_uid = prev_sender_user.get('idstr', '')
        prev_source = retweeted_status.get('source', '')
        prev_picture_dict = []
        prev_picture_dict = retweeted_status.get('pic_infos', {})
        prev_attachments_name = ''

        for key, value in prev_picture_dict.items():
            large_pic = value.get('large', {})
            large_url = large_pic.get('url', '').replace(' ', '')
            large_name = key + '.jpg'
            large_abspath = self.download_dir + "/WeiBo/picture/{}/photos/".format(
                self.weiboapi.uid)
            large_relpath = '/WeiBo/picture/{}/photos/{}'.format(
                self.weiboapi.uid, large_name)
            self.download.add_task(url=large_url,
                                   path=large_abspath,
                                   name=large_name)
            prev_attachments_name += '\"{}\";'.format(large_relpath)

        sql = '''INSERT INTO "TBL_PRCD_WEIBO_BLOGINFO" ("strWeiboSessionID","strWeiboContent","strWeiboAttachmentName",
                 "strWeiboAttachmentLocalPath","strWeiboSendTime","strSrcAccount","strWeiboSenderNick","strSource",
                 "strWeiboAttiCount","strWeiboForwardCount","strWeiboCommentCount",
                 "strPrevCreatedAt","strPrevMblogID","strPrevContent","strPrevUID","strPrevSource") VALUES
                 ({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})
            '''

        params = (mblog_id, content, attachments_name, prev_attachments_name,
                  created_at, self.weiboapi.uid, sender_nick, source,
                  attitudes_count, reposts_count, comments_count,
                  prev_created_at, prev_mblog_id, prev_content,
                  prev_sender_uid, prev_source)

        return utils.sql_format(sql, params)