def get_main_page_info(self):
        """获取主页信息"""
        url, url2 = self.get_main_page_url()
        # self.headers['host'] = 'user.qzone.qq.com'
        try:
            res = self.req.get(url=url, headers=self.headers)
            if self.debug:
                print("主页信息状态:", res.status_code)
            content = json.loads(self.get_json(res.content.decode("utf-8")))
            data = content['data']['module_16']['data']
            self.user_info.mood_num = data['SS']
            self.user_info.photo_num = data['XC']
            self.user_info.rz_num = data['RZ']
            self.mood_num = self.user_info.mood_num if self.mood_num == -1 else self.mood_num
            if self.use_redis:
                self.re.set(MOOD_NUM_KEY + self.username, self.mood_num)
                self.re.rpush(WEB_SPIDER_INFO + self.username, "获取主页信息成功")
                self.re.rpush(WEB_SPIDER_INFO + self.username,
                              MOOD_NUM_PRE + ":" + str(self.mood_num))
                if not self.no_delete:
                    self.re.expire(MOOD_NUM_KEY + self.username,
                                   EXPIRE_TIME_IN_SECONDS)

            if self.debug:
                print(self.user_info.mood_num)
                print("Finish to get main page info")

        except BaseException as e:
            self.format_error(e, "Failed to get main page info")
            if self.use_redis:
                self.re.rpush(WEB_SPIDER_INFO + self.username,
                              GET_MAIN_PAGE_FAILED)
        try:
            self.headers[
                'referer'] = 'https://user.qzone.qq.com/' + self.raw_username + '/main'
            res = self.req.get(url=url2, headers=self.headers)
            if self.debug:
                print("获取登陆时间状态:", res.status_code)
            content = json.loads(self.get_json(res.content.decode("utf-8")))
            data = content['data']
            self.user_info.first_time = util.get_standard_time_from_mktime(
                data['firstlogin'])
            today = int(datetime.datetime.now().year)
            first_year = int(self.user_info.first_time.split('-')[0])
            years = today - first_year
            self.user_info.years = years
            if self.debug:
                print("Finish to get first time")
            print("Success to Get Main Page Info!")
        except BaseException as e:
            self.format_error(e, "Failed to get first login time")
            if self.use_redis:
                self.re.rpush(WEB_SPIDER_INFO + self.username,
                              GET_FIRST_LOGIN_TIME)
 def calculate_friend_num_timeline(self, timestamp, friend_df):
     """
     :param timestamp: 传入时间戳
     :return: 用户在给定时间点的好友数量
     """
     friend_total_num = friend_df.shape[0]
     friend_df_time = friend_df[friend_df['add_friend_time'] > timestamp]
     friend_time_num = friend_total_num - friend_df_time.shape[0]
     if self.debug:
         print(util.get_standard_time_from_mktime(timestamp), friend_time_num)
     return friend_time_num
Exemple #3
0
    def get_first_friend_info(self):
        if self.friend_df is None:
            self.friend_df = pd.read_csv(self.FRIEND_DETAIL_LIST_FILE_NAME)
        self.get_single_friend()
        # self.user_info.friend_num = self.friend_df.shape[0]
        zero_index = self.friend_df[self.friend_df['add_friend_time'] == 0].index
        self.friend_df.drop(index=zero_index, axis=0, inplace=True)
        self.friend_df.reset_index(inplace=True)
        early_time = util.get_standard_time_from_mktime(self.friend_df.loc[0,'add_friend_time'])

        early_nick = self.friend_df.loc[0, 'nick_name']
        first_header_url = self.FRIEND_HEADER_IMAGE_PATH + str(int(self.friend_df.loc[0, 'friend_uin'])) + '.jpg'

        self.user_info.first_friend = early_nick
        self.user_info.first_friend_time = early_time
        self.user_info.first_friend_header = first_header_url
        self.user_info.save_user()
Exemple #4
0
    def get_main_page_info(self):
        """获取主页信息"""
        url, url2 = self.get_main_page_url()
        # self.headers['host'] = 'user.qzone.qq.com'
        try:
            res = self.req.get(url=url, headers=self.headers)
            if self.debug:
                print("主页信息状态:", res.status_code)
            content = json.loads(self.get_json(res.content.decode("utf-8")))
            data = content['data']['module_16']['data']
            self.user_info.mood_num = data['SS']
            self.user_info.photo_num = data['XC']
            self.user_info.rz_num = data['RZ']
            self.mood_num = self.user_info.mood_num if self.mood_num == -1 else self.mood_num

            if self.debug:
                print(self.user_info.mood_num)
                print("Finish to get main page info")
        except BaseException as e:
            self.format_error(e, "获取主页信息失败")
        try:
            self.headers[
                'referer'] = 'https://user.qzone.qq.com/1272082503/main'
            res = self.req.get(url=url2, headers=self.headers)
            if self.debug:
                print("获取登陆时间状态:", res.status_code)
            content = json.loads(self.get_json(res.content.decode("utf-8")))
            data = content['data']
            self.user_info.first_time = util.get_standard_time_from_mktime(
                data['firstlogin'])
            today = int(datetime.datetime.now().year)
            first_year = int(self.user_info.first_time.split('-')[0])
            years = today - first_year
            self.user_info.years = years
            if self.debug:
                print("Finish to get first time")

        except BaseException as e:
            self.format_error(e, "获取第一次登陆时间失败")