def get_main_page_info(self): """获取主页信息""" url, url2 = self.get_main_page_url() # self.headers['host'] = 'user.qzone.qq.com' try: res = self.req.get(url=url, headers=self.headers) if self.debug: print("主页信息状态:", res.status_code) content = json.loads(self.get_json(res.content.decode("utf-8"))) data = content['data']['module_16']['data'] self.user_info.mood_num = data['SS'] self.user_info.photo_num = data['XC'] self.user_info.rz_num = data['RZ'] self.mood_num = self.user_info.mood_num if self.mood_num == -1 else self.mood_num if self.use_redis: self.re.set(MOOD_NUM_KEY + self.username, self.mood_num) self.re.rpush(WEB_SPIDER_INFO + self.username, "获取主页信息成功") self.re.rpush(WEB_SPIDER_INFO + self.username, MOOD_NUM_PRE + ":" + str(self.mood_num)) if not self.no_delete: self.re.expire(MOOD_NUM_KEY + self.username, EXPIRE_TIME_IN_SECONDS) if self.debug: print(self.user_info.mood_num) print("Finish to get main page info") except BaseException as e: self.format_error(e, "Failed to get main page info") if self.use_redis: self.re.rpush(WEB_SPIDER_INFO + self.username, GET_MAIN_PAGE_FAILED) try: self.headers[ 'referer'] = 'https://user.qzone.qq.com/' + self.raw_username + '/main' res = self.req.get(url=url2, headers=self.headers) if self.debug: print("获取登陆时间状态:", res.status_code) content = json.loads(self.get_json(res.content.decode("utf-8"))) data = content['data'] self.user_info.first_time = util.get_standard_time_from_mktime( data['firstlogin']) today = int(datetime.datetime.now().year) first_year = int(self.user_info.first_time.split('-')[0]) years = today - first_year self.user_info.years = years if self.debug: print("Finish to get first time") print("Success to Get Main Page Info!") except BaseException as e: self.format_error(e, "Failed to get first login time") if self.use_redis: self.re.rpush(WEB_SPIDER_INFO + self.username, GET_FIRST_LOGIN_TIME)
def calculate_friend_num_timeline(self, timestamp, friend_df): """ :param timestamp: 传入时间戳 :return: 用户在给定时间点的好友数量 """ friend_total_num = friend_df.shape[0] friend_df_time = friend_df[friend_df['add_friend_time'] > timestamp] friend_time_num = friend_total_num - friend_df_time.shape[0] if self.debug: print(util.get_standard_time_from_mktime(timestamp), friend_time_num) return friend_time_num
def get_first_friend_info(self): if self.friend_df is None: self.friend_df = pd.read_csv(self.FRIEND_DETAIL_LIST_FILE_NAME) self.get_single_friend() # self.user_info.friend_num = self.friend_df.shape[0] zero_index = self.friend_df[self.friend_df['add_friend_time'] == 0].index self.friend_df.drop(index=zero_index, axis=0, inplace=True) self.friend_df.reset_index(inplace=True) early_time = util.get_standard_time_from_mktime(self.friend_df.loc[0,'add_friend_time']) early_nick = self.friend_df.loc[0, 'nick_name'] first_header_url = self.FRIEND_HEADER_IMAGE_PATH + str(int(self.friend_df.loc[0, 'friend_uin'])) + '.jpg' self.user_info.first_friend = early_nick self.user_info.first_friend_time = early_time self.user_info.first_friend_header = first_header_url self.user_info.save_user()
def get_main_page_info(self): """获取主页信息""" url, url2 = self.get_main_page_url() # self.headers['host'] = 'user.qzone.qq.com' try: res = self.req.get(url=url, headers=self.headers) if self.debug: print("主页信息状态:", res.status_code) content = json.loads(self.get_json(res.content.decode("utf-8"))) data = content['data']['module_16']['data'] self.user_info.mood_num = data['SS'] self.user_info.photo_num = data['XC'] self.user_info.rz_num = data['RZ'] self.mood_num = self.user_info.mood_num if self.mood_num == -1 else self.mood_num if self.debug: print(self.user_info.mood_num) print("Finish to get main page info") except BaseException as e: self.format_error(e, "获取主页信息失败") try: self.headers[ 'referer'] = 'https://user.qzone.qq.com/1272082503/main' res = self.req.get(url=url2, headers=self.headers) if self.debug: print("获取登陆时间状态:", res.status_code) content = json.loads(self.get_json(res.content.decode("utf-8"))) data = content['data'] self.user_info.first_time = util.get_standard_time_from_mktime( data['firstlogin']) today = int(datetime.datetime.now().year) first_year = int(self.user_info.first_time.split('-')[0]) years = today - first_year self.user_info.years = years if self.debug: print("Finish to get first time") except BaseException as e: self.format_error(e, "获取第一次登陆时间失败")