def get_first_mood(self, mood_num): """ 获取用户最早的一条动态的发表时间 :param mood_num: :param url_mood: :return: """ try: last_page = math.ceil(mood_num / 20) - 1 pos = 20 * last_page url_mood = self.get_mood_url() + '&uin=' + str(self.username) url = url_mood + '&pos=' + str(pos) mood_list = self.req.get(url=url, headers=self.h5_headers, timeout=20) if self.debug: print("第一次动态发表时间:", mood_list.status_code) try: json_content = json.loads( self.get_json(str(mood_list.content.decode('utf-8')))) except: json_content = json.loads( self.get_json(str(remove_special_tag(mood_list.text)))) last_mood = json_content['msglist'][-1] today = int(datetime.datetime.now().year) self.user_info.first_mood_time = last_mood['createTime'] first_year = int(last_mood['createTime'].split('年')[0]) years = today - first_year self.user_info.years = years print(self.user_info.years) except BaseException as e: self.user_info.first_mood_time = '' self.format_error(e, "Failed to get first send mood time") finally: self.user_info.save_user()
def get_json_content(self, url): mood_list = self.req.get(url=url, headers=self.h5_headers, timeout=20) # print(mood_list.content) try: json_content = self.get_json(str(mood_list.content.decode('utf-8'))) except BaseException as e: json_content = self.get_json(remove_special_tag(mood_list.text)) return json_content
def get_mood_num(self): url_mood = self.get_mood_url() + '&uin=' + str(self.username) url = url_mood + '&pos=0' res = self.req.get(url=url, headers=self.h5_headers, timeout=20) try: mood = res.content.decode('utf-8') except BaseException as e: if self.debug: print("该用户信息存在乱码") self.format_error(e, "Bad decode exists in user info") mood = remove_special_tag(res.text) if self.debug: print("获取主页动态数量的状态码:", res.status_code) mood_json = json.loads(self.get_json(mood)) mood_num = mood_json['usrinfo']['msgnum'] return mood_num
def clean_friend_data(self): """ 清洗好友数据,生成csv :return: """ try: if len(self.friend_list) == 0: self.load_friend_data() friend_total_num = len(self.friend_list) print("valid friend num:", friend_total_num) friend_list_df = pd.DataFrame(self.friend_list) self.friend_detail_list = [] if friend_total_num == 0: print("该用户没有好友") return False for friend in self.friend_detail: try: friend_uin = friend['friendUin'] add_friend_time = friend['addFriendTime'] img = friend_list_df.loc[friend_list_df['uin'] == friend_uin, 'img'].values[0] nick = friend['nick'] nick_name = remove_special_tag(nick[str(friend_uin)]) common_friend_num = len(friend['common']['friend']) common_group_num = len(friend['common']['group']) common_group_names = friend['common']['group'] self.friend_detail_list.append( dict(uin=self.username, friend_uin=friend_uin, add_friend_time=add_friend_time, nick_name=nick_name, common_friend_num=common_friend_num, common_group_num=common_group_num, common_group_names=common_group_names, img=img)) except BaseException as e: if self.debug: print("单向好友:", friend) self.friend_detail_list.append( dict(uin=0, friend_uin=friend['friendUin'], add_friend_time=0, nick_name='单向好友', common_friend_num=0, common_group_num=0, common_group_names='', img='')) friend_df = pd.DataFrame(self.friend_detail_list) friend_df.sort_values(by='add_friend_time', inplace=True) friend_df['add_friend_time2'] = friend_df['add_friend_time'].apply( lambda x: util.get_full_time_from_mktime(x)) friend_df.fillna('', inplace=True) if self.export_excel: friend_df.to_excel(self.FRIEND_DETAIL_EXCEL_FILE_NAME) if self.export_csv: friend_df.to_csv(self.FRIEND_DETAIL_LIST_FILE_NAME) if self.debug: print("Finish to clean friend data...") print("File Name:", self.FRIEND_DETAIL_LIST_FILE_NAME) self.friend_df = friend_df return True except BaseException as e: self.format_error(e, "Failed to parse friend_info") return False