# --------------------------
#      Web Scraping
# --------------------------

## News title, link scraping

from webscrap import wlog
from webscrap import wscrap

# Define log file location
wlog.set_custom_log_info('html/scratch.log')

news_scrap = wscrap.NewsScraper(wscrap.url_aj, wlog)

## UNCOMMENT THE FOLLOWING 2 LINES OF CODE TO GET SITE'S LATEST DATA
## OTHERWISE THIS PROGRAM PARSE DATA FROM DISK FILE RETRIEVED PREVIOUSLY

news_scrap.retrieve_webpage()
news_scrap.write_webpage_as_html()

news_scrap.read_webpage_from_html()
news_scrap.convert_data_to_bs4()
#news_scrap.print_beautiful_soup()
news_scrap.parse_soup_to_simple_html()
Esempio n. 2
0
from webscrap import wlog
from webscrap import wscrap

wlog.set_custom_log_info('html/error.log')

news_scrap = wscrap.WebScraper(wscrap.url, wlog)
news_scrap.retrieve_page()
news_scrap.write_data_as_html()
news_scrap.read_data_as_html()
news_scrap.convert_data_to_bs4()
news_scrap.soup_parse_to_simple_html()
from webscrap import wlog
from webscrap import wscrap

wlog.set_custom_log_info("html/error.log")

news_scrap = wscrap.NewsScraper(wscrap.url_aj, wlog)
news_scrap.retrieve_webpage()
news_scrap.write_webpage_as_html()

news_scrap.read_webpage_from_html()
news_scrap.convert_data_to_bs4()
news_scrap.parse_soup_to_simple_html()