def scrapeCampusServices(): print("*************** Scraping Campus Services *********************") soup = get_soup('https://www.dal.ca/faculty_staff.html') service_nodes = soup.find_all("h4", class_="c-title") service_list: List[CampusService] = list() web_link_list: List[WebLinks] = list() service_id = 0 for node in service_nodes: service_id = service_id + 1 link_nodes = node.find_next("ul").find_all("li") service = node.find_next("h4").find_next("a") service_url = service.get("href") service_url = dal_prefix(service_url) service_name = service.get_text() campus_service = CampusService(service_name, service_url) campus_service.id = service_id service_list.append(campus_service) for link_node in link_nodes: link = link_node.find_next("a") url = link.get('href') url = dal_prefix(url) text = link.get_text() web_link = WebLinks(text, url, service_name) web_link.service_id = service_id web_link_list.append(web_link) xml_camp_service = XmlList() xml_camp_service.from_list(service_list) xml_camp_service.save("campus_service.xml") xml_web_links = XmlList() xml_web_links.from_list(web_link_list) xml_web_links.save("web_links.xml")
def get_global_events(): xml_list = XmlList() final_list: List[Event] = list() base_url = 'https://www.dal.ca/news/events/_jcr_content/contentPar/eventslisting.month.html/2019-{}-01.html' for i in range(1, 12): try: url = base_url.format(i) r = http.request('GET', url) soup = BeautifulSoup(r.data, features="html.parser") dal_event_nodes = soup.findAll("div", class_="h4-placeholder") for event_node in dal_event_nodes: link = event_node.find("a").get("href").strip() name = event_node.find("a").get_text().strip() event_obj = Event(name, link) print("GET - {}".format(event_obj.name)) event_obj = get_event_details(event_obj) final_list.append(event_obj) except Exception as e: print(e) generate_id(final_list) xml_list.from_list(final_list) return xml_list