#!/usr/bin/env python from models import User, UserPool, Article, ArticlePool, Progress import requests import re from bs4 import BeautifulSoup #page = requests.get("https://www.ptt.cc/bbs/Gossiping/index20373.html", cookies={"over18":"1"}) #bs = BeautifulSoup(page.content, "html.parser") #print(list(div.find("a") for div in list(bs.find_all(class_="title")))) #ap = ArticlePool() #ap.load() prg = Progress() prg.load() #print(len(ap.articles)) print(prg.current_index)
page.status_code, url) log_error_message(error_message) return round_trip = page.elapsed.total_seconds() request_time_total += round_trip request_counter += 1 print("[Info]: Time: {}s \t Counter: {} \t Average: {}s".format( round_trip, request_counter, request_time_total / request_counter)) return BeautifulSoup(page.content, "html.parser") progress = Progress() user_pool = UserPool() article_pool = ArticlePool() progress.load() user_pool.load() article_pool.load() for index_url in url_generator(progress): print("[Info]: ".ljust(60, "=")) print("[Index Page]: {}{}".format(ROOT_URL, index_url)) index_html = retrive_html_from_url(index_url) for article_item_div in index_html.find_all(class_="title"): article_item_a = article_item_div.find("a") if not article_item_a: error_message = "[Error]: On Page: {}\n" \ "[Error]: Unable Parse Item {}\n".format(index_url, article_item_div) log_error_message(error_message) continue