def search_user(self, ptt_id: str, min_page: int = None, max_page: int = None) -> list: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) if min_page is not None: check_value.check_index(self.config, 'min_page', min_page) if max_page is not None: check_value.check_index(self.config, 'max_page', max_page) if min_page is not None and max_page is not None: check_value.check_index_range(self.config, 'min_page', min_page, 'max_page', max_page) try: from . import _api_search_user except ModuleNotFoundError: import _api_search_user return _api_search_user.search_user(self, ptt_id, min_page, max_page)
def throw_waterball(self, ptt_id, content) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) check_value.check(self.config, str, 'content', content) if len(ptt_id) <= 2: raise ValueError( log.merge(self.config, ['ptt_id', i18n.ErrorParameter, ptt_id])) user = self._get_user(ptt_id) if '不在站上' in user.status: raise exceptions.UserOffline(ptt_id) try: from . import _api_waterball except ModuleNotFoundError: import _api_waterball return _api_waterball.throw_waterball(self, ptt_id, content)
def mark_post(self, mark_type: int, board: str, post_aid: str = None, post_index: int = 0, search_type: int = 0, search_condition: str = None) -> None: # 標記文章 self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None try: from . import _api_mark_post except ModuleNotFoundError: import _api_mark_post _api_mark_post.markPost(self, mark_type, board, post_aid, post_index, search_type, search_condition)
def bucket(self, board: str, bucket_days: int, reason: str, ptt_id: str) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'board', board) check_value.check(self.config, int, 'bucket_days', bucket_days) check_value.check(self.config, str, 'reason', reason) check_value.check(self.config, str, 'ptt_id', ptt_id) self._get_user(ptt_id) self._check_board(board, check_moderator=True) try: from . import _api_bucket except ModuleNotFoundError: import _api_bucket _api_bucket.bucket(self, board, bucket_days, reason, ptt_id)
def mail(self, ptt_id: str, title: str, content: str, sign_file) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) check_value.check(self.config, str, 'title', title) check_value.check(self.config, str, 'content', content) check_sign_file = False for i in range(0, 10): if str(i) == sign_file or i == sign_file: check_sign_file = True break if not check_sign_file: sign_file = sign_file.lower() if sign_file != 'x': raise ValueError( log.merge(self.config, ['SignFile', i18n.ErrorParameter, sign_file])) try: from . import _api_mail except ModuleNotFoundError: import _api_mail _api_mail.mail(self, ptt_id, title, content, sign_file)
def get_call_status(self) -> int: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None return self._get_call_status()
def get_user(self, user_id) -> data_type.UserInfo: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None return self._get_user(user_id)
def del_mail(self, index): self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None current_index = self.get_newest_index(data_type.index_type.MAIL) check_value.check_index(self.config, index, current_index) try: from . import _api_mail except ModuleNotFoundError: import _api_mail return _api_mail.del_mail(self, index)
def give_money(self, ptt_id: str, money: int) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ID', ptt_id) check_value.check(self.config, int, 'Money', money) # Check user self.get_user(ptt_id) try: from . import _api_give_money except ModuleNotFoundError: import _api_give_money return _api_give_money.give_money(self, ptt_id, money)
def set_call_status(self, call_status) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, int, 'call_status', call_status, value_class=data_type.call_status) try: from . import _api_call_status except ModuleNotFoundError: import _api_call_status return _api_call_status.set_call_status(self, call_status)
def get_waterball(self, operate_type: int) -> list: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, int, 'OperateType', operate_type, value_class=data_type.waterball_operate_type) try: from . import _api_waterball except ModuleNotFoundError: import _api_waterball return _api_waterball.get_waterball(self, operate_type)
def set_board_title(self, board: str, new_title: str) -> None: # 第一支板主專用 API self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'board', board) check_value.check(self.config, str, 'new_title', new_title) self._check_board(board, check_moderator=True) try: from . import _api_set_board_title except ModuleNotFoundError: import _api_set_board_title _api_set_board_title.set_board_title(self, board, new_title)
def get_newest_index(self, index_type: int, board: str = None, search_type: int = 0, search_condition: str = None) -> int: self._one_thread() if index_type == data_type.index_type.BBS or index_type == data_type.index_type.MAIL: if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if index_type == data_type.index_type.MAIL: if self._unregistered_user: raise exceptions.UnregisteredUser( lib_util.get_current_func_name()) self.config.log_last_value = None try: return self._get_newest_index(index_type, board, search_type, search_condition) except Exception: return self._get_newest_index(index_type, board, search_type, search_condition)
def markPost(api, mark_type: int, board: str, post_aid: str, post_index: int, search_type: int, search_condition: str) -> None: log.show_value(api.config, log.level.INFO, [i18n.PTT, i18n.Msg], i18n.MarkPost) check_value.check(api.config, int, 'mark_type', mark_type, value_class=data_type.mark_type) check_value.check(api.config, str, 'Board', board) if post_aid is not None: check_value.check(api.config, str, 'PostAID', post_aid) check_value.check(api.config, int, 'PostIndex', post_index) check_value.check(api.config, int, 'SearchType', search_type, value_class=data_type.post_search_type) if search_condition is not None: check_value.check(api.config, str, 'SearchCondition', search_condition) if len(board) == 0: raise ValueError(log.merge([i18n.Board, i18n.ErrorParameter, board])) if mark_type != data_type.mark_type.DeleteD: if post_index != 0 and isinstance(post_aid, str): raise ValueError( log.merge(api.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) if post_index == 0 and post_aid is None: raise ValueError( log.merge(api.config, ['PostIndex', 'PostAID', i18n.ErrorParameter])) if search_condition is not None and search_type == 0: raise ValueError( log.merge(api.config, [ 'SearchType', i18n.ErrorParameter, ])) if search_type == data_type.post_search_type.PUSH: try: S = int(search_condition) except ValueError: raise ValueError( log.merge(api.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if not (-100 <= S <= 110): raise ValueError( log.merge(api.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if post_aid is not None and search_condition is not None: raise ValueError( log.merge(api.config, [ 'PostAID', 'SearchCondition', i18n.ErrorParameter, i18n.BothInput, ])) if post_index != 0: newest_index = api._get_newest_index(data_type.index_type.BBS, board=board, search_type=search_type, search_condition=search_condition) check_value.check_index(api.config, 'PostIndex', post_index, max_value=newest_index) if mark_type == data_type.mark_type.UNCONFIRMED: # 批踢踢兔沒有待證文章功能 QQ if api.config.host == data_type.host_type.PTT2: raise exceptions.HostNotSupport(lib_util.get_current_func_name()) api._check_board(board, check_moderator=True) cmd_list = [] cmd_list.append(command.GoMainMenu) cmd_list.append('qs') cmd_list.append(board) cmd_list.append(command.Enter) cmd = ''.join(cmd_list) target_list = [ connect_core.TargetUnit( i18n.AnyKeyContinue, '任意鍵', response=' ', ), connect_core.TargetUnit([ '動畫播放中', ], '互動式動畫播放中', response=command.Ctrl_C, log_level=log.level.DEBUG), connect_core.TargetUnit([ '進板成功', ], screens.Target.InBoard, break_detect=True, log_level=log.level.DEBUG), ] index = api.connect_core.send(cmd, target_list) cmd_list = [] if post_aid is not None: cmd_list.append('#' + post_aid) cmd_list.append(command.Enter) elif post_index != 0: if search_condition is not None: if search_type == data_type.post_search_type.KEYWORD: cmd_list.append('/') elif search_type == data_type.post_search_type.AUTHOR: cmd_list.append('a') elif search_type == data_type.post_search_type.PUSH: cmd_list.append('Z') elif search_type == data_type.post_search_type.MARK: cmd_list.append('G') elif search_type == data_type.post_search_type.MONEY: cmd_list.append('A') cmd_list.append(search_condition) cmd_list.append(command.Enter) cmd_list.append(str(post_index)) cmd_list.append(command.Enter) if mark_type == data_type.mark_type.S: cmd_list.append('L') elif mark_type == data_type.mark_type.D: cmd_list.append('t') elif mark_type == data_type.mark_type.DeleteD: cmd_list.append(command.Ctrl_D) elif mark_type == data_type.mark_type.M: cmd_list.append('m') elif mark_type == data_type.mark_type.UNCONFIRMED: cmd_list.append(command.Ctrl_E + 'S') cmd = ''.join(cmd_list) target_list = [ connect_core.TargetUnit([i18n.DelAllMarkPost], '刪除所有標記', response='y' + command.Enter, log_level=log.level.INFO), connect_core.TargetUnit([ i18n.Mark, i18n.Success, ], screens.Target.InBoard, break_detect=True, log_level=log.level.INFO), ] index = api.connect_core.send(cmd, target_list)
def crawl_board( self, crawl_type: int, post_handler, board: str, # BBS版本 start_index: int = 0, end_index: int = 0, start_aid: str = None, end_aid: str = None, search_type: int = 0, search_condition: str = None, query: bool = False, # 網頁版本 start_page: int = 0, end_page: int = 0) -> list: self._one_thread() self.config.log_last_value = None check_value.check(self.config, int, 'crawl_type', crawl_type, value_class=data_type.crawl_type) check_value.check(self.config, str, 'Board', board) if len(board) == 0: raise ValueError( log.merge(self.config, [i18n.Board, i18n.ErrorParameter, board])) if crawl_type == data_type.crawl_type.BBS: if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) check_value.check(self.config, int, 'SearchType', search_type) if search_condition is not None: check_value.check(self.config, str, 'SearchCondition', search_condition) if start_aid is not None: check_value.check(self.config, str, 'StartAID', start_aid) if end_aid is not None: check_value.check(self.config, str, 'EndAID', end_aid) if (start_aid is not None or end_aid is not None) and \ (start_index != 0 or end_index != 0): raise ValueError( log.merge( self.config, ['AID', 'Index', i18n.ErrorParameter, i18n.BothInput])) if (start_aid is not None or end_aid is not None) and \ (search_condition is not None): raise ValueError( log.merge(self.config, [ 'AID', 'SearchCondition', i18n.ErrorParameter, i18n.BothInput ])) if search_type == data_type.post_search_type.PUSH: try: S = int(search_condition) except ValueError: raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if not (-100 <= S <= 110): raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if start_index != 0: newest_index = self._get_newest_index( data_type.index_type.BBS, board=board, search_type=search_type, search_condition=search_condition) check_value.check_index_range(self.config, 'start_index', start_index, 'end_index', end_index, max_value=newest_index) elif start_aid is not None and end_aid is not None: start_index = self.get_post(board, post_aid=start_aid, query=True).index end_index = self.get_post(board, post_aid=end_aid, query=True).index check_value.check_index_range(self.config, 'start_index', start_index, 'end_index', end_index) else: raise ValueError( log.merge(self.config, [i18n.ErrorParameter, i18n.NoInput])) log.show_value(self.config, log.level.DEBUG, 'StartIndex', start_index) log.show_value(self.config, log.level.DEBUG, 'EndIndex', end_index) error_post_list = [] del_post_list = [] if self.config.log_level == log.level.INFO: PB = progressbar.ProgressBar(max_value=end_index - start_index + 1, redirect_stdout=True) for index in range(start_index, end_index + 1): for i in range(2): need_continue = False post = None try: post = self._get_post( board, post_index=index, search_type=search_type, search_condition=search_condition, query=query) except exceptions.ParseError as e: if i == 1: raise e need_continue = True except exceptions.UnknownError as e: if i == 1: raise e need_continue = True except exceptions.NoSuchBoard as e: if i == 1: raise e need_continue = True except exceptions.NoMatchTargetError as e: if i == 1: raise e need_continue = True except exceptions.ConnectionClosed as e: if i == 1: raise e log.log(self.config, log.level.INFO, i18n.RestoreConnection) self._login(self._ID, self._Password, self.config.kick_other_login) need_continue = True except exceptions.UseTooManyResources as e: if i == 1: raise e log.log(self.config, log.level.INFO, i18n.RestoreConnection) self._login(self._ID, self._Password, self.config.kick_other_login) need_continue = True if post is None: need_continue = True elif not post.pass_format_check: need_continue = True if need_continue: log.log(self.config, log.level.DEBUG, 'Wait for retry repost') time.sleep(0.1) continue break if self.config.log_level == log.level.INFO: PB.update(index - start_index) if post is None: error_post_list.append(index) continue if not post.pass_format_check: if post.aid is not None: error_post_list.append(post.aid) else: error_post_list.append(index) continue if post.delete_status != data_type.post_delete_status.NOT_DELETED: del_post_list.append(index) post_handler(post) if self.config.log_level == log.level.INFO: PB.finish() return error_post_list, del_post_list else: if self.config.host == data_type.host_type.PTT2: raise exceptions.HostNotSupport( lib_util.get_current_func_name()) # 網頁版本爬蟲 # https://www.ptt.cc/bbs/index.html # 1. 取得總共有幾頁 MaxPage newest_index = self._get_newest_index(data_type.index_type.WEB, board=board) # 2. 檢查 StartPage 跟 EndPage 有沒有在 1 ~ MaxPage 之間 check_value.check_index_range(self.config, 'StartPage', start_page, 'EndPage', end_page, max_value=newest_index) # 3. 把每篇文章(包括被刪除文章)欄位解析出來組合成 data_type.PostInfo error_post_list = [] del_post_list = [] # PostAID = "" _url = 'https://www.ptt.cc/bbs/' index = str(newest_index) if self.config.log_level == log.level.INFO: PB = progressbar.ProgressBar(max_value=end_page - start_page + 1, redirect_stdout=True) def deleted_post(post_title): if post_title.startswith('('): if '本文' in post_title: return data_type.post_delete_status.AUTHOR elif post_title.startswith('(已被'): return data_type.post_delete_status.MODERATOR else: return data_type.post_delete_status.UNKNOWN else: return data_type.post_delete_status.NOT_DELETED for index in range(start_page, newest_index + 1): log.show_value(self.config, log.level.DEBUG, 'CurrentPage', index) url = _url + board + '/index' + str(index) + '.html' r = requests.get(url, cookies={'over18': '1'}) if r.status_code != requests.codes.ok: raise exceptions.NoSuchBoard(self.config, board) soup = BeautifulSoup(r.text, 'html.parser') for div in soup.select('div.r-ent'): web = div.select('div.title a') post = { 'author': div.select('div.author')[0].text, 'title': div.select('div.title')[0].text.strip('\n').strip(), 'web': web[0].get('href') if web else '' } if post['title'].startswith('('): del_post_list.append(post['title']) if post['title'].startswith('(本文'): if '[' in post['title']: post['author'] = post['title'].split( '[')[1].split(']')[0] else: post['author'] = post['title'].split( '<')[1].split('>')[0] else: post['author'] = post['title'].split('<')[1].split( '>')[0] post = data_type.PostInfo( board=board, author=post['author'], title=post['title'], web_url='https://www.ptt.cc' + post['web'], delete_status=deleted_post(post['title'])) post_handler(post) if self.config.log_level == log.level.INFO: PB.update(index - start_page) log.show_value(self.config, log.level.DEBUG, 'DelPostList', del_post_list) # 4. 把組合出來的 Post 塞給 handler # 5. 顯示 progress bar if self.config.log_level == log.level.INFO: PB.finish() return error_post_list, del_post_list