def search_user(self, ptt_id: str, min_page: int = None, max_page: int = None) -> list: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) if min_page is not None: check_value.check_index(self.config, 'min_page', min_page) if max_page is not None: check_value.check_index(self.config, 'max_page', max_page) if min_page is not None and max_page is not None: check_value.check_index_range(self.config, 'min_page', min_page, 'max_page', max_page) try: from . import _api_search_user except ModuleNotFoundError: import _api_search_user return _api_search_user.search_user(self, ptt_id, min_page, max_page)
def mail(self, ptt_id: str, title: str, content: str, sign_file) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) check_value.check(self.config, str, 'title', title) check_value.check(self.config, str, 'content', content) check_sign_file = False for i in range(0, 10): if str(i) == sign_file or i == sign_file: check_sign_file = True break if not check_sign_file: sign_file = sign_file.lower() if sign_file != 'x': raise ValueError( log.merge(self.config, ['SignFile', i18n.ErrorParameter, sign_file])) try: from . import _api_mail except ModuleNotFoundError: import _api_mail _api_mail.mail(self, ptt_id, title, content, sign_file)
def bucket(self, board: str, bucket_days: int, reason: str, ptt_id: str) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'board', board) check_value.check(self.config, int, 'bucket_days', bucket_days) check_value.check(self.config, str, 'reason', reason) check_value.check(self.config, str, 'ptt_id', ptt_id) self._get_user(ptt_id) self._check_board(board, check_moderator=True) try: from . import _api_bucket except ModuleNotFoundError: import _api_bucket _api_bucket.bucket(self, board, bucket_days, reason, ptt_id)
def mark_post(self, mark_type: int, board: str, post_aid: str = None, post_index: int = 0, search_type: int = 0, search_condition: str = None) -> None: # 標記文章 self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None try: from . import _api_mark_post except ModuleNotFoundError: import _api_mark_post _api_mark_post.markPost(self, mark_type, board, post_aid, post_index, search_type, search_condition)
def throw_waterball(self, ptt_id, content) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ptt_id', ptt_id) check_value.check(self.config, str, 'content', content) if len(ptt_id) <= 2: raise ValueError( log.merge(self.config, ['ptt_id', i18n.ErrorParameter, ptt_id])) user = self._get_user(ptt_id) if '不在站上' in user.status: raise exceptions.UserOffline(ptt_id) try: from . import _api_waterball except ModuleNotFoundError: import _api_waterball return _api_waterball.throw_waterball(self, ptt_id, content)
def reply_post(self, reply_type: int, board: str, content: str, sign_file=0, post_aid: str = None, post_index: int = 0) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, int, 'reply_type', reply_type, value_class=data_type.reply_type) check_value.check(self.config, str, 'Board', board) check_value.check(self.config, str, 'Content', content) if post_aid is not None: check_value.check(self.config, str, 'PostAID', post_aid) if post_index != 0: newest_index = self._get_newest_index(data_type.index_type.BBS, board=board) check_value.check_index(self.config, 'PostIndex', post_index, max_value=newest_index) sign_file_list = [str(x) for x in range(0, 10)] sign_file_list.append('x') if str(sign_file) not in sign_file_list: raise ValueError( log.merge(self.config, ['SignFile', i18n.ErrorParameter])) if post_aid is not None and post_index != 0: raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) self._check_board(board) try: from . import _api_reply_post except ModuleNotFoundError: import _api_reply_post _api_reply_post.reply_post(self, reply_type, board, content, sign_file, post_aid, post_index)
def get_user(self, user_id) -> data_type.UserInfo: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None return self._get_user(user_id)
def get_call_status(self) -> int: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None return self._get_call_status()
def get_board_info(self, board: str) -> data_type.BoardInfo: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, str, 'board', board) return self._get_board_info(board, call_by_others=False)
def get_time(self) -> str: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None try: from . import _api_get_time except ModuleNotFoundError: import _api_get_time return _api_get_time.get_time(self)
def get_favourite_board(self) -> list: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None try: from . import _api_get_favourite_board except ModuleNotFoundError: import _api_get_favourite_board return _api_get_favourite_board.get_favourite_board(self)
def has_new_mail(self) -> int: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None try: from . import _api_has_new_mail except ModuleNotFoundError: import _api_has_new_mail return _api_has_new_mail.has_new_mail(self)
def has_new_mail(self) -> int: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self.get_newest_index(data_type.index_type.MAIL) == 0: return 0 self.config.log_last_value = None try: from . import _api_has_new_mail except ModuleNotFoundError: import _api_has_new_mail return _api_has_new_mail.has_new_mail(self)
def get_newest_index(self, index_type: int, board: str = None, search_type: int = 0, search_condition: str = None) -> int: self._one_thread() if index_type == data_type.index_type.BBS: if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None try: return self._get_newest_index(index_type, board, search_type, search_condition) except Exception: return self._get_newest_index(index_type, board, search_type, search_condition)
def del_mail(self, index): self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None current_index = self.get_newest_index(data_type.index_type.MAIL) check_value.check_index(self.config, index, current_index) try: from . import _api_mail except ModuleNotFoundError: import _api_mail return _api_mail.del_mail(self, index)
def give_money(self, ptt_id: str, money: int) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'ID', ptt_id) check_value.check(self.config, int, 'Money', money) # Check user self.get_user(ptt_id) try: from . import _api_give_money except ModuleNotFoundError: import _api_give_money return _api_give_money.give_money(self, ptt_id, money)
def set_call_status(self, call_status) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, int, 'call_status', call_status, value_class=data_type.call_status) try: from . import _api_call_status except ModuleNotFoundError: import _api_call_status return _api_call_status.set_call_status(self, call_status)
def get_waterball(self, operate_type: int) -> list: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, int, 'OperateType', operate_type, value_class=data_type.waterball_operate_type) try: from . import _api_waterball except ModuleNotFoundError: import _api_waterball return _api_waterball.get_waterball(self, operate_type)
def set_board_title(self, board: str, new_title: str) -> None: # 第一支板主專用 API self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if self._unregistered_user: raise exceptions.UnregisteredUser(lib_util.get_current_func_name()) self.config.log_last_value = None check_value.check(self.config, str, 'board', board) check_value.check(self.config, str, 'new_title', new_title) self._check_board(board, check_moderator=True) try: from . import _api_set_board_title except ModuleNotFoundError: import _api_set_board_title _api_set_board_title.set_board_title(self, board, new_title)
def get_newest_index(self, index_type: int, board: str = None, search_type: int = 0, search_condition: str = None) -> int: self._one_thread() if index_type == data_type.index_type.BBS or index_type == data_type.index_type.MAIL: if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) if index_type == data_type.index_type.MAIL: if self._unregistered_user: raise exceptions.UnregisteredUser( lib_util.get_current_func_name()) self.config.log_last_value = None try: return self._get_newest_index(index_type, board, search_type, search_condition) except Exception: return self._get_newest_index(index_type, board, search_type, search_condition)
def post(self, board: str, title: str, content: str, post_type: int, sign_file) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, str, 'Board', board) check_value.check(self.config, str, 'Title', title) check_value.check(self.config, str, 'Content', content) check_value.check(self.config, int, 'PostType', post_type) check_sign_file = False for i in range(0, 10): if str(i) == sign_file or i == sign_file: check_sign_file = True break if not check_sign_file: sign_file = sign_file.lower() if sign_file != 'x': raise ValueError( log.merge(self.config, ['SignFile', i18n.ErrorParameter, sign_file])) self._check_board(board) try: from . import _api_post except ModuleNotFoundError: import _api_post return _api_post.post(self, board, title, content, post_type, sign_file)
def push(self, board: str, push_type: int, push_content: str, post_aid: str = None, post_index: int = 0) -> None: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, str, 'Board', board) check_value.check(self.config, int, 'push_type', push_type, value_class=data_type.push_type) check_value.check(self.config, str, 'PushContent', push_content) if post_aid is not None: check_value.check(self.config, str, 'PostAID', post_aid) check_value.check(self.config, int, 'PostIndex', post_index) if len(board) == 0: raise ValueError( log.merge(self.config, [i18n.Board, i18n.ErrorParameter, board])) if post_index != 0 and isinstance(post_aid, str): raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) if post_index == 0 and post_aid is None: raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.NoInput ])) if post_index != 0: newest_index = self._get_newest_index(data_type.index_type.BBS, board=board) check_value.check_index(self.config, 'PostIndex', post_index, newest_index) self._check_board(board) max_push_length = 33 push_list = [] temp_start_index = 0 temp_end_index = temp_start_index + 1 while temp_end_index <= len(push_content): temp = '' last_temp = None while len(temp.encode('big5-uao', 'replace')) < max_push_length: temp = push_content[temp_start_index:temp_end_index] if not len(temp.encode('big5-uao', 'replace')) < max_push_length: break elif push_content.endswith(temp): break elif temp.endswith('\n'): break elif last_temp == temp: break temp_end_index += 1 last_temp = temp push_list.append(temp.strip()) temp_start_index = temp_end_index temp_end_index = temp_start_index + 1 push_list = filter(None, push_list) for push in push_list: log.show_value(self.config, log.level.INFO, i18n.Push, push) for _ in range(2): try: self._push(board, push_type, push, post_aid=post_aid, post_index=post_index) break except exceptions.NoFastPush: # screens.show(self.config, self.connect_core.getScreenQueue()) log.log(self.config, log.level.INFO, '等待快速推文') time.sleep(5.2)
def crawl_board( self, crawl_type: int, post_handler, board: str, # BBS版本 start_index: int = 0, end_index: int = 0, start_aid: str = None, end_aid: str = None, search_type: int = 0, search_condition: str = None, query: bool = False, # 網頁版本 start_page: int = 0, end_page: int = 0) -> list: self._one_thread() self.config.log_last_value = None check_value.check(self.config, int, 'crawl_type', crawl_type, value_class=data_type.crawl_type) check_value.check(self.config, str, 'Board', board) if len(board) == 0: raise ValueError( log.merge(self.config, [i18n.Board, i18n.ErrorParameter, board])) if crawl_type == data_type.crawl_type.BBS: if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) check_value.check(self.config, int, 'SearchType', search_type) if search_condition is not None: check_value.check(self.config, str, 'SearchCondition', search_condition) if start_aid is not None: check_value.check(self.config, str, 'StartAID', start_aid) if end_aid is not None: check_value.check(self.config, str, 'EndAID', end_aid) if (start_aid is not None or end_aid is not None) and \ (start_index != 0 or end_index != 0): raise ValueError( log.merge( self.config, ['AID', 'Index', i18n.ErrorParameter, i18n.BothInput])) if (start_aid is not None or end_aid is not None) and \ (search_condition is not None): raise ValueError( log.merge(self.config, [ 'AID', 'SearchCondition', i18n.ErrorParameter, i18n.BothInput ])) if search_type == data_type.post_search_type.PUSH: try: S = int(search_condition) except ValueError: raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if not (-100 <= S <= 110): raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if start_index != 0: newest_index = self._get_newest_index( data_type.index_type.BBS, board=board, search_type=search_type, search_condition=search_condition) check_value.check_index_range(self.config, 'start_index', start_index, 'end_index', end_index, max_value=newest_index) elif start_aid is not None and end_aid is not None: start_index = self.get_post(board, post_aid=start_aid, query=True).index end_index = self.get_post(board, post_aid=end_aid, query=True).index check_value.check_index_range(self.config, 'start_index', start_index, 'end_index', end_index) else: raise ValueError( log.merge(self.config, [i18n.ErrorParameter, i18n.NoInput])) log.show_value(self.config, log.level.DEBUG, 'StartIndex', start_index) log.show_value(self.config, log.level.DEBUG, 'EndIndex', end_index) error_post_list = [] del_post_list = [] if self.config.log_level == log.level.INFO: PB = progressbar.ProgressBar(max_value=end_index - start_index + 1, redirect_stdout=True) for index in range(start_index, end_index + 1): for i in range(2): need_continue = False post = None try: post = self._get_post( board, post_index=index, search_type=search_type, search_condition=search_condition, query=query) except exceptions.ParseError as e: if i == 1: raise e need_continue = True except exceptions.UnknownError as e: if i == 1: raise e need_continue = True except exceptions.NoSuchBoard as e: if i == 1: raise e need_continue = True except exceptions.NoMatchTargetError as e: if i == 1: raise e need_continue = True except exceptions.ConnectionClosed as e: if i == 1: raise e log.log(self.config, log.level.INFO, i18n.RestoreConnection) self._login(self._ID, self._Password, self.config.kick_other_login) need_continue = True except exceptions.UseTooManyResources as e: if i == 1: raise e log.log(self.config, log.level.INFO, i18n.RestoreConnection) self._login(self._ID, self._Password, self.config.kick_other_login) need_continue = True if post is None: need_continue = True elif not post.pass_format_check: need_continue = True if need_continue: log.log(self.config, log.level.DEBUG, 'Wait for retry repost') time.sleep(0.1) continue break if self.config.log_level == log.level.INFO: PB.update(index - start_index) if post is None: error_post_list.append(index) continue if not post.pass_format_check: if post.aid is not None: error_post_list.append(post.aid) else: error_post_list.append(index) continue if post.delete_status != data_type.post_delete_status.NOT_DELETED: del_post_list.append(index) post_handler(post) if self.config.log_level == log.level.INFO: PB.finish() return error_post_list, del_post_list else: if self.config.host == data_type.host_type.PTT2: raise exceptions.HostNotSupport( lib_util.get_current_func_name()) # 網頁版本爬蟲 # https://www.ptt.cc/bbs/index.html # 1. 取得總共有幾頁 MaxPage newest_index = self._get_newest_index(data_type.index_type.WEB, board=board) # 2. 檢查 StartPage 跟 EndPage 有沒有在 1 ~ MaxPage 之間 check_value.check_index_range(self.config, 'StartPage', start_page, 'EndPage', end_page, max_value=newest_index) # 3. 把每篇文章(包括被刪除文章)欄位解析出來組合成 data_type.PostInfo error_post_list = [] del_post_list = [] # PostAID = "" _url = 'https://www.ptt.cc/bbs/' index = str(newest_index) if self.config.log_level == log.level.INFO: PB = progressbar.ProgressBar(max_value=end_page - start_page + 1, redirect_stdout=True) def deleted_post(post_title): if post_title.startswith('('): if '本文' in post_title: return data_type.post_delete_status.AUTHOR elif post_title.startswith('(已被'): return data_type.post_delete_status.MODERATOR else: return data_type.post_delete_status.UNKNOWN else: return data_type.post_delete_status.NOT_DELETED for index in range(start_page, newest_index + 1): log.show_value(self.config, log.level.DEBUG, 'CurrentPage', index) url = _url + board + '/index' + str(index) + '.html' r = requests.get(url, cookies={'over18': '1'}) if r.status_code != requests.codes.ok: raise exceptions.NoSuchBoard(self.config, board) soup = BeautifulSoup(r.text, 'html.parser') for div in soup.select('div.r-ent'): web = div.select('div.title a') post = { 'author': div.select('div.author')[0].text, 'title': div.select('div.title')[0].text.strip('\n').strip(), 'web': web[0].get('href') if web else '' } if post['title'].startswith('('): del_post_list.append(post['title']) if post['title'].startswith('(本文'): if '[' in post['title']: post['author'] = post['title'].split( '[')[1].split(']')[0] else: post['author'] = post['title'].split( '<')[1].split('>')[0] else: post['author'] = post['title'].split('<')[1].split( '>')[0] post = data_type.PostInfo( board=board, author=post['author'], title=post['title'], web_url='https://www.ptt.cc' + post['web'], delete_status=deleted_post(post['title'])) post_handler(post) if self.config.log_level == log.level.INFO: PB.update(index - start_page) log.show_value(self.config, log.level.DEBUG, 'DelPostList', del_post_list) # 4. 把組合出來的 Post 塞給 handler # 5. 顯示 progress bar if self.config.log_level == log.level.INFO: PB.finish() return error_post_list, del_post_list
def get_post(self, board: str, post_aid: str = None, post_index: int = 0, search_type: int = 0, search_condition: str = None, query: bool = False) -> data_type.PostInfo: self._one_thread() if not self._login_status: raise exceptions.Requirelogin(i18n.Requirelogin) self.config.log_last_value = None check_value.check(self.config, str, 'Board', board) if post_aid is not None: check_value.check(self.config, str, 'PostAID', post_aid) check_value.check(self.config, int, 'PostIndex', post_index) check_value.check(self.config, int, 'SearchType', search_type, value_class=data_type.post_search_type) if search_condition is not None: check_value.check(self.config, str, 'SearchCondition', search_condition) if len(board) == 0: raise ValueError( log.merge(self.config, [i18n.Board, i18n.ErrorParameter, board])) if post_index != 0 and isinstance(post_aid, str): raise ValueError( log.merge(self.config, [ 'PostIndex', 'PostAID', i18n.ErrorParameter, i18n.BothInput ])) if post_index == 0 and post_aid is None: raise ValueError( log.merge(self.config, ['PostIndex', 'PostAID', i18n.ErrorParameter])) if search_condition is not None and search_type == 0: raise ValueError( log.merge(self.config, [ 'SearchType', i18n.ErrorParameter, ])) if search_type == data_type.post_search_type.PUSH: try: S = int(search_condition) except ValueError: raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if not (-100 <= S <= 110): raise ValueError( log.merge(self.config, [ 'SearchCondition', i18n.ErrorParameter, ])) if post_aid is not None and search_condition is not None: raise ValueError( log.merge(self.config, [ 'PostAID', 'SearchCondition', i18n.ErrorParameter, i18n.BothInput, ])) if post_index != 0: newest_index = self._get_newest_index( data_type.index_type.BBS, board=board, search_type=search_type, search_condition=search_condition) if post_index < 1 or newest_index < post_index: raise ValueError( log.merge(self.config, [ 'PostIndex', i18n.ErrorParameter, i18n.OutOfRange, ])) self._check_board(board) for i in range(2): need_continue = False post = None try: post = self._get_post(board, post_aid, post_index, search_type, search_condition, query) except exceptions.ParseError as e: if i == 1: raise e need_continue = True except exceptions.UnknownError as e: if i == 1: raise e need_continue = True except exceptions.NoSuchBoard as e: if i == 1: raise e need_continue = True except exceptions.NoMatchTargetError as e: if i == 1: raise e need_continue = True if post is None: need_continue = True elif not post.pass_format_check: need_continue = True if need_continue: log.log(self.config, log.level.DEBUG, 'Wait for retry repost') time.sleep(0.1) continue break return post