# -------------------------- # Web Scraping # -------------------------- ## News title, link scraping from webscrap import wlog from webscrap import wscrap # Define log file location wlog.set_custom_log_info('html/scratch.log') news_scrap = wscrap.NewsScraper(wscrap.url_aj, wlog) ## UNCOMMENT THE FOLLOWING 2 LINES OF CODE TO GET SITE'S LATEST DATA ## OTHERWISE THIS PROGRAM PARSE DATA FROM DISK FILE RETRIEVED PREVIOUSLY news_scrap.retrieve_webpage() news_scrap.write_webpage_as_html() news_scrap.read_webpage_from_html() news_scrap.convert_data_to_bs4() #news_scrap.print_beautiful_soup() news_scrap.parse_soup_to_simple_html()
from webscrap import wlog from webscrap import wscrap wlog.set_custom_log_info('html/error.log') news_scrap = wscrap.WebScraper(wscrap.url, wlog) news_scrap.retrieve_page() news_scrap.write_data_as_html() news_scrap.read_data_as_html() news_scrap.convert_data_to_bs4() news_scrap.soup_parse_to_simple_html()
from webscrap import wlog from webscrap import wscrap wlog.set_custom_log_info("html/error.log") news_scrap = wscrap.NewsScraper(wscrap.url_aj, wlog) news_scrap.retrieve_webpage() news_scrap.write_webpage_as_html() news_scrap.read_webpage_from_html() news_scrap.convert_data_to_bs4() news_scrap.parse_soup_to_simple_html()