def refresh(self, attempts=10): """Refreshes the rss feed. Refreshes the contents received from the RSS feed and sets 'self.feed' to the new feed contents. Args: attempts (int): number of attempts to refresh contents before stopping stopping. """ for _ in range(attempts): feed_update = feedparser.parse(self.rss_url) # feedparser creates a 'status' attribute if there is a response if 'status' in feed_update: if feed_update.status == 304: continue elif feed_update.status == 200: self.feed = feed_update return else: LOGGER.error(f"feed_update: {feed_update.bozo_exception}") LOGGER.info(f"{attempts} unnsuccessful attempts at refreshing data.")
def get_race_datetime(self, html, race_type): if not html: LOGGER.error("Could not scrape %s because of broken HTML" % str(html)) return [] soup = BS(html, "html.parser") race_header = soup.find("section", attrs={"id": "rp-header"}) header_rows = race_header.find_all("tr") if len(header_rows) < 2: LOGGER.warning("Could not fetch DATE and TIME from %s" % self.url) return [] race_datetime = header_rows[0].h2.text.split() if len(race_datetime) != 5: LOGGER.warning("Could not fetch DATE and TIME from %s" % self.url) return [] race_time = race_datetime[0] race_date_str = " ".join(race_datetime[1:]) try: race_date = dt.datetime.strftime( dt.datetime.strptime(race_date_str, "%a %d %B %Y"), "%-d/%-m/%y") except ValueError: race_date = dt.datetime.strftime( dt.datetime.strptime(race_date_str, "%A %d %B %Y"), "%-d/%-m/%y") distance_of_the_race = header_rows[1].find_all("td")[-1].text self.date_and_time["date"] = race_date self.date_and_time["time"] = race_time date_and_time_section = soup.find("span", class_='rp-title-course-name') if date_and_time_section: self.date_and_time["track"] = date_and_time_section.text.title( ).strip() else: title_text = soup.find('h1', class_='rp-title').text self.date_and_time["track"] = ' '.join( title_text.split()[:-1]).title().strip() self.date_and_time["distance"] = distance_of_the_race.split( ":")[-1].replace('(Inner)', '').replace(' (Old)', '').replace(' (XC)', '').replace(' (New)', '').strip() self.date_and_time["race_type"] = race_type return self.date_and_time
def check_update(self, os_name: str, call: bool = False) -> None: """ Проверка наличия обновлений :param os_name: имя OS :param call: булево принудительно ли отправлен запрос на проверку обновлений default: False :return: """ version = os.path.join(path_to_version, 'version.txt') try: LOGGER.info('Клонируем version') response = requests.get(REPO_URL_VERSION) with tempfile.TemporaryFile() as file: file.write(response.content) with zipfile.ZipFile(file) as fzip: fzip.extractall(path) except ConnectionError as error: LOGGER.error(f'Произошла ошибка при клонировании проекта {error}') if call is True: showerror( 'Невозможно выполнить обновление', ERROR_MSG['Update_app']['Bad_connect'].format(VERSION)) return with open(version, 'r', encoding='utf-8') as file: file = file.readline().strip().split('&') shutil.rmtree(path_to_version, ignore_errors=True, onerror=None) version = file[0].strip() v_int = [int(item) for item in version.split('.')] version_old = [item for item in VERSION.split('.')] v_old_int = [int(item) for item in version_old] info = file[1] condition_1 = v_int[0] > v_old_int[0] condition_2 = v_int[0] >= v_old_int[0] and v_int[1] > v_old_int[1] condition_3 = \ v_int[0] >= v_old_int[0] and v_int[1] >= v_old_int[1] and \ v_int[2] > v_old_int[2] need_update = (False, True)[condition_1 or condition_2 or condition_3] if (call is True) and (need_update is False): showinfo('Обновление не требуется', INFO_MSG['Update_app']['Not_need_update'].format(version)) if need_update is True: answer = askyesnocancel( 'Требуется обновление', ASKS['Update_app']['Need_update'].format(version=version, info=info)) if answer is False: return if answer is None: LOGGER.info('Отмена автообновлений') update_request_db = UpdateRequestsToDB() update_request_db.update_table( tb_name=update_request_db.settings, update_row={'auto_update': 0}) return if answer is True: try: self.update_app(os_name) except ConnectionError as error: LOGGER.error(f'Невозможно обновиться {os_name} -> {error}') showerror( 'Невозможно выполнить обновление', ERROR_MSG['Update_app']['Bad_connect'].format(VERSION))
def __parsing_by_groups__(self, lbl: object, length_ids: int, ids: list, progressbar: object, last_parse: int) -> Union[None, List[int]]: """ Функция с алгоритмом парсинга по группам :param lbl: Label прогресса :param length_ids: len для list ids :param ids: list с id групп :param progressbar: Progressbar :param last_parse: Возможен ли дальнейший парсинг :return: список [количество, pk] """ url = HTTP_FOR_REQUESTS.format(method='execute') code, request_count = ([PARSE_BY_GROUP_CODE, 11000], [EASY_PARSE_BY_GROUP_CODE, 25000])[last_parse != 1] type_request, pk, count = NAME_PARSING['by_groups'], None, 0 self.result, vk_params = [], {'group_id': ''} for i in range(length_ids): token = ConfigureVkApi().token if token is None: showerror('Неверный токен', ERROR_MSG['Parsing']['Bad_token']) return lbl_text = f'Прогресс: {i}/{length_ids}. Не прекращайте ' \ f'работу, это займёт пару минут...' configure_progress_lbl(progressbar, lbl, pg_value := 0, lbl_text) offset, i_response, json_error = 0, 0, 0 vk_params['group_id'] = ids[i] while True: try: params = { 'v': VERSION_API, 'access_token': token, 'code': code.format(offset=offset, vk_params=vk_params) } response = requests.get(url, params=params) response = response.json() if response.get('execute_errors') or response.get('error'): if i == length_ids - 1: break else: continue response = response['response'] count_id = int(response['count_id']) offset = int(response['offset']) vk_result = response['result'] count += len(vk_result) self.result += vk_result json_error = 0 del vk_result, response lbl_text = f'Прогресс: {i}/{length_ids}. Запрос: ' \ f'{i_response}/{count_id // request_count}. ' \ 'Не прекращайте работу, это займёт пару ' \ 'минут... ' step = PROGRESSBAR_MAX / (count_id / request_count) pg_value += step configure_progress_lbl(progressbar, lbl, pg_value, lbl_text) if offset >= count_id: if count > 0: pk = self.__update_db__(pk, self.result, last_parse, type_request) del self.result self.result = [] gc.collect() break if len(self.result) >= COUNT_MANY_INSERT: pk = self.__update_db__(pk, self.result, last_parse, type_request) del self.result self.result = [] gc.collect() offset += 1000 i_response += 1 gc.collect() except JSONDecodeError as error: LOGGER.error(f'Ошибка при парсинге по группам {error}') if json_error == 3: if i == length_ids - 1: break else: continue else: json_error += 1 if len(self.result) > 0: pk = self.__update_db__(pk, self.result, last_parse, type_request) del offset, last_parse, type_request, self.result gc.collect() return [count, pk]
class BrainForApp: """ Класс отвечающий за настройку и запуск приложения """ def __init__(self, window_preview): """ Создаёт превью и проверяет нужные настройки для программы, а также запускает её :param window_preview: объект окна превью """ self.logger = LOGGER('main', 'main') png_preview_open, png_preview = self.preview_image_open() self.preview_image_set(png_preview_open, png_preview, window_preview) window_preview.update() tracemalloc.start() time.sleep(2) MainDB() get_requests_db = GetRequestsToDB() settings = get_requests_db.get_records( tb_name=get_requests_db.settings, one_record=True, select=['first_start', 'auto_update']) first_start = settings['first_start'] auto_update = settings['auto_update'] if first_start == 1: self.logger.info('Первый запуск') window_preview.destroy() done = AdditionalWindows().person_and_agreement_data() if done is True: update_requests_db = UpdateRequestsToDB() update_requests_db.update_table( tb_name=update_requests_db.settings, update_row={'first_start': 0}) self.logger.warning('Очистка от лишних файлов в директории') list_path = os.listdir(path) if REPO_BRANCH_UPDATER in list_path: rmtree(REPO_BRANCH_UPDATER, ignore_errors=True, onerror=None) if REPO_BRANCH_VERSION in list_path: rmtree(REPO_BRANCH_VERSION, ignore_errors=True, onerror=None) if REPO_BRANCH_MASTER in list_path: rmtree(REPO_BRANCH_MASTER, ignore_errors=True, onerror=None) try: self.logger.warning('Закрытие окна первью') window_preview.destroy() except TclError: pass del settings, first_start, list_path, window_preview gc.collect() self.logger.info('Создание задачи scheduler') scheduler = BackgroundScheduler() scheduler.start() scheduler.add_job(__scheduler__, 'interval', minutes=1) self.logger.info('Запуск приложения') from windows import App App(auto_update, OS) self.logger.info('Закрытие приложения') def preview_image_open(self): """ Возвращает первью картинку """ while True: try: png_preview_open = Image.open( os.path.join(path_to_dir_ico, 'preview.png')) png_preview = ImageTk.PhotoImage(png_preview_open) return png_preview_open, png_preview except FileNotFoundError as err: self.logger.error(str(err)) @staticmethod def preview_image_set(png_preview_open, png_preview, window_preview): """ Устанавливает размеры окна, ставит его по середине, устанавливает картинку как фон """ x_img, y_img = png_preview_open.size x = (window_preview.winfo_screenwidth() - x_img) // 2 y = (window_preview.winfo_screenheight() - y_img) // 2 window_preview.geometry("%ix%i+%i+%i" % (x_img, y_img, x, y)) Label(window_preview, image=png_preview).pack(side='top')
class ConfigureVkApi: """ Класс отвечающий за нстройку инструментов для запросов к API Vk """ def __init__(self, ignore_existing_token: bool = False): self.logger = LOGGER('config_vk_api', 'vk_api') get_requests_db = GetRequestsToDB() user_data_table_value = get_requests_db.get_records( tb_name=get_requests_db.userdata, one_record=True, select=['access_token']) token = user_data_table_value['access_token'] self.__additional_windows = AdditionalWindows if ignore_existing_token is False: if (token is None) or (token == DEFAULT_VALUE_FOR_BD): token = self.get_token() else: token = self.get_token() if (token is not None) or (token != DEFAULT_VALUE_FOR_BD): is_donat = self.check_is_donat(token) if is_donat is False: token = None self.token = token if self.token is not None: vk_session = vk_api.VkApi(token=self.token) self.vk_tool = vk_api.tools.VkTools(vk_session) if ignore_existing_token is True: showinfo('Авторизовались', 'Вы удачно авторизовались!') self.logger.info('Получен vk_tool и сам токен') else: self.logger.error('vk_tool не удалось получить') self.vk_tool = None del get_requests_db, user_data_table_value def get_token(self) -> Union[str, None]: """ Функция получения токнеа пользователя :return: """ showinfo('Получение токена!', INFO_MSG['VK_API']['get_token']) web_open_new_tab(HTTP_GET_TOKEN) token = self.__additional_windows().get_token() token = self.preparation_final_token(token) if token == DEFAULT_VALUE_FOR_BD: LOGGER.warning( 'При выполнении функции get_token был получен невалидный токен' ) return None params = {'v': VERSION_API, 'access_token': token} try: request = requests.get( HTTP_FOR_REQUESTS.format(method='users.get'), params=params).json() except ConnectionError: showerror( 'Нет подключения', 'Не возиожно авторизоваться, нетп подключения к интернету') return None if request.get('error'): showerror( 'Авторизация не удалась', 'Неверный токен авторизации, произошла ошибка, ' 'повторите попытку') return None update_requests_db = UpdateRequestsToDB() update_requests_db.update_table(tb_name=update_requests_db.userdata, update_row={'access_token': token}) del request return token @staticmethod def check_is_donat(token: str) -> bool: """ Функция проверки оплаты подписки на программу пользователем :param token: токен пользователя :return: """ params = { 'v': VERSION_API, 'access_token': token, 'owner_id': ID_GROUP_VK } try: request = requests.get( HTTP_FOR_REQUESTS.format(method='donut.isDon'), params=params).json() except ConnectionError: showerror( 'Нет подключения', 'Невозможно авторизоваться, нет подключения к интернету') return False if request.get('error'): showerror('Ошибка', f'Произошла непредвиденная ошибка {request["error"]}') response = request.get('response') if int(response) == 1: return True else: get_requests_db = GetRequestsToDB() __start = GetRequestsToDB().get_records( select=['start_free_version'], one_record=True, tb_name=get_requests_db.settings)['start_free_version'] if __start is None: warning = WARNING_MSG['VK_API']['is_not_donat_free'] showwarning('Пробная версия!', warning.format(min=TIME_FREE_VERSION // 60)) start_free_version = time_now() update_request_db = UpdateRequestsToDB() update_request_db.update_table( tb_name=update_request_db.settings, update_row={'start_free_version': int(start_free_version)}) return True else: time_use_free_version = ceil(time_now()) - int(__start) if time_use_free_version >= TIME_FREE_VERSION: warning = WARNING_MSG['VK_API']['is_not_donat'] showwarning('Пробная версия!', warning) return False else: time_left = TIME_FREE_VERSION - time_use_free_version warning = WARNING_MSG['VK_API']['is_not_donat_free'] showwarning('Пробная версия!', warning.format(min=time_left // 60)) return True def preparation_final_token(self, token: str) -> str: """ Функция обработки ссылки и получения из неё токена :param token: ссылка с токеном :return: """ token = token.split('access_token=') if len(token) == 2: token = token[1].split('&')[0] return token showwarning('Не смог распознать токен', WARNING_MSG['VK_API']['non_inspected_token']) self.logger.warning( 'При выполнении preparation_final_token, не смог распознать токен') return DEFAULT_VALUE_FOR_BD
master.overrideredirect(True) error_logger = LOGGER('App', 'error') try: app_brain = BrainForApp(master) except SystemExit: error_logger.info('Закрытие программы') pass except MemoryError as error: try: master.destroy() except TclError: pass size_now, peak = tracemalloc.get_traced_memory() size_now = size_now // 1024 peak = peak // 1024 showerror( 'Ошибка', f'Недостаточно оперативной памяти!\n\nИспользуется: {size_now}Mib' f', В пике: {peak}Mib\n\n{error}') error_logger.error('Нехватка памяти: Используется - ' f'{size_now}Mib, В пике - {peak}Mib --> {error}') except BaseException as error: try: master.destroy() except TclError: pass showerror('Ошибка', f'Произошла непредвиденная ошибка\n\n{error}') error_logger.error(f'{error}')
def start_scrape(): completeness = 1 today_data_list = [] yesterday_data_list = [] engine = cloudscraper.create_scraper() site_connector = SiteConnector(URL, engine) yesterday_general_html = site_connector.get_html_page(site_connector.url) yesterday_all_race_urls = site_connector.get_race_venues( yesterday_general_html) if not yesterday_general_html: LOGGER.error("Could not scrape because of broken HTML") print("Could not scrape because of broken HTML") exit() print('\n\nToday data collection process just started!') today_html = site_connector.get_html_page(HOME_URL) today_all_race_urls = site_connector.get_race_venues(today_html, True) completeness = 1 for tod_url in today_all_race_urls: html = site_connector.get_html_page(tod_url[0]) if not html: continue today_instance = RaceScraper(tod_url) today_instance.get_race_datetime(html, tod_url[1]) today_result = today_instance.get_race_info_before(html) for row in today_result: today_data_list.append(tuple(row)) done = round(completeness / len(today_all_race_urls) * 100, 2) print('Completed {}%'.format(done)) completeness += 1 time.sleep(4.7) else: print('Today data collection process has finished!') '''This part is EXTREMELY important, as it check for duplicates, so never remove it from here!''' try: last_row_list = db.select( 'date', DB_TABLE_NAME, 'WHERE id = (SELECT MAX(id) FROM %s) LIMIT 1' % DB_TABLE_NAME) last_date = last_row_list[0][0] instance = RaceScraper(URL) instnce_html = site_connector.get_html_page( yesterday_all_race_urls[0][0]) first_race_information = instance.yesterday_result(instnce_html, None)[0] if last_date in first_race_information: print( '\nATTENTION: The data might be duplicated, as You scraped the data for today ALREADY!' ) db.db_to_xslx(set(today_data_list)) exit() except IndexError as e: print('Database is empty...') print('Yesterday data collection process just started...') for yes_url in yesterday_all_race_urls: yesterday_instance = RaceScraper(yes_url) html = site_connector.get_html_page(yes_url[0]) yesterday_result = yesterday_instance.yesterday_result( html, yes_url[1]) if yesterday_result: yesterday_data_list.extend(yesterday_result) done = round(completeness / len(yesterday_all_race_urls) * 100, 2) print('Completed {}%'.format(done)) completeness += 1 time.sleep(4.7) else: print('Yesterday data collection process has finished!') return set(today_data_list), yesterday_data_list