def parse_recent_visit(self, file_path, time_step): # 必须新开线程执行 while True: try: url, _ = self.get_main_page_url() res = self.req.get(url=url, headers=self.headers) content = json.loads(self.get_json( res.content.decode("utf-8"))) visit_data = content['data']['module_3']['data'] if 'items' in visit_data: visit_list = visit_data['items'] for item in visit_list: self.visit_list.append( dict(qq=item['uin'], time=util.get_full_time_from_mktime( item['time']), name=item['name'])) visit_df = pd.DataFrame(self.visit_list) visit_df.drop_duplicates(inplace=True) visit_df.to_excel(file_path, index=False) time.sleep(time_step) except BaseException: print("获取最近访客出错") exit(3)
def __init__(self): self.sp = QQZoneSpider(use_redis=False, debug=False, from_client=True, mood_num=20) warm_tip = "****************************************\n" \ "**************QQ空间抽奖小程序***************\n" \ "****************************************" self.output(warm_tip) self.output("请输入获取最近访客的时间间隔,默认为60秒") time_step = input() try: time_step = int(time_step) except: time_step = 60 visit_file_name = "最近访客" + get_full_time_from_mktime(int( time.time())) + ".xlsx" self.output("最近访客文件名:" + visit_file_name) try: self.sp.login_with_qr_code() self.output("用户" + self.sp.username + "登陆成功!") except BaseException: self.output("用户登陆失败!请检查网络连接或稍后再试!") exit(1) visit_t = threading.Thread(target=self.sp.parse_recent_visit, args=[visit_file_name, time_step]) visit_t.start() self.output("正在获取最近的说说...") url_mood = self.sp.get_mood_url() url_mood = url_mood + '&uin=' + str(self.sp.username) self.content_list = self.get_content_list(url_mood, 0) self.content_list += self.get_content_list(url_mood, 20) self.content_list += self.get_content_list(url_mood, 40) self.output('------------------------') self.output("最近的60条说说:") for item in self.content_list: content = item['content'] if len(content) > 20: content = content[0:20] + '。。。' item_str = '|' + str(item['order']) + '|' + content self.output(item_str) while True: try: self.output('------------------------') self.output("**以下输入请全部只输入数字!按回车键结束输入!**") self.output("**输入Q退出本程序**") is_digit = False while not is_digit: self.output('请输入您要选择的说说序号:') mood_order = input() is_digit = self.check_input(mood_order) mood_order = int(mood_order) if mood_order > len(self.content_list): pos = (math.ceil(mood_order / 20) - 1) * 20 mood_order = mood_order % 20 t1 = threading.Thread(target=self.get_all, args=(url_mood, pos, mood_order)) t1.setDaemon(True) t1.start() else: unikey = self.content_list[mood_order - 1] key = unikey['unikey'] tid = unikey['tid'] t2 = threading.Thread(target=self.start_like_cmt_thread, args=(key, tid)) t2.setDaemon(True) t2.start() is_digit = False while not is_digit: self.output('请输入抽奖的类型,1-点赞;2-评论;(其它)-我全都要! :') type = input() is_digit = self.check_input(type) is_digit = False while not is_digit: self.output('请选择抽奖的用户数量:') user_num = input() is_digit = self.check_input(user_num) # 等待线程运行完 try: t1.join() t2.join() self.like_t.join() self.cmt_t.join() except: pass self.type = int(type) self.user_num = int(user_num) self.file_name = self.content_list[mood_order - 1]['content'] if len(self.file_name) > 10: self.file_name = self.file_name[:10] self.file_name = re.sub('[^\w\u4e00-\u9fff]+', '', self.file_name) if len(self.file_name) <= 0: self.file_name = str(mood_order) print("说说:", self.file_name) self.do_raffle() cmt_df = pd.DataFrame(self.cmt_list) like_df = pd.DataFrame(self.like_list) cmt_df.to_excel("评论-" + self.file_name + '.xlsx', index=False) like_df.to_excel("点赞-" + self.file_name + '.xlsx', index=False) except BaseException as e: if str(e) == '2': exit(2) else: self.output('----------------------') print(e) self.output("未知错误,请重新尝试") self.output('----------------------')
def clean_friend_data(self): """ 清洗好友数据,生成csv :return: """ try: if len(self.friend_list) == 0: self.load_friend_data() friend_total_num = len(self.friend_list) print("valid friend num:", friend_total_num) friend_list_df = pd.DataFrame(self.friend_list) self.friend_detail_list = [] if friend_total_num == 0: print("该用户没有好友") return False for friend in self.friend_detail: try: friend_uin = friend['friendUin'] add_friend_time = friend['addFriendTime'] img = friend_list_df.loc[friend_list_df['uin'] == friend_uin, 'img'].values[0] nick = friend['nick'] nick_name = remove_special_tag(nick[str(friend_uin)]) common_friend_num = len(friend['common']['friend']) common_group_num = len(friend['common']['group']) common_group_names = friend['common']['group'] self.friend_detail_list.append( dict(uin=self.username, friend_uin=friend_uin, add_friend_time=add_friend_time, nick_name=nick_name, common_friend_num=common_friend_num, common_group_num=common_group_num, common_group_names=common_group_names, img=img)) except BaseException as e: if self.debug: print("单向好友:", friend) self.friend_detail_list.append( dict(uin=0, friend_uin=friend['friendUin'], add_friend_time=0, nick_name='单向好友', common_friend_num=0, common_group_num=0, common_group_names='', img='')) friend_df = pd.DataFrame(self.friend_detail_list) friend_df.sort_values(by='add_friend_time', inplace=True) friend_df['add_friend_time2'] = friend_df['add_friend_time'].apply( lambda x: util.get_full_time_from_mktime(x)) friend_df.fillna('', inplace=True) if self.export_excel: friend_df.to_excel(self.FRIEND_DETAIL_EXCEL_FILE_NAME) if self.export_csv: friend_df.to_csv(self.FRIEND_DETAIL_LIST_FILE_NAME) if self.debug: print("Finish to clean friend data...") print("File Name:", self.FRIEND_DETAIL_LIST_FILE_NAME) self.friend_df = friend_df return True except BaseException as e: self.format_error(e, "Failed to parse friend_info") return False