コード例 #1
0
def scrapeCampusServices():
    print("*************** Scraping Campus Services *********************")
    soup = get_soup('https://www.dal.ca/faculty_staff.html')
    service_nodes = soup.find_all("h4", class_="c-title")
    service_list: List[CampusService] = list()
    web_link_list: List[WebLinks] = list()

    service_id = 0
    for node in service_nodes:
        service_id = service_id + 1
        link_nodes = node.find_next("ul").find_all("li")
        service = node.find_next("h4").find_next("a")
        service_url = service.get("href")
        service_url = dal_prefix(service_url)
        service_name = service.get_text()
        campus_service = CampusService(service_name, service_url)
        campus_service.id = service_id
        service_list.append(campus_service)
        for link_node in link_nodes:
            link = link_node.find_next("a")
            url = link.get('href')
            url = dal_prefix(url)
            text = link.get_text()
            web_link = WebLinks(text, url, service_name)
            web_link.service_id = service_id
            web_link_list.append(web_link)

    xml_camp_service = XmlList()
    xml_camp_service.from_list(service_list)
    xml_camp_service.save("campus_service.xml")

    xml_web_links = XmlList()
    xml_web_links.from_list(web_link_list)
    xml_web_links.save("web_links.xml")
コード例 #2
0
def get_global_events():
    xml_list = XmlList()
    final_list: List[Event] = list()
    base_url = 'https://www.dal.ca/news/events/_jcr_content/contentPar/eventslisting.month.html/2019-{}-01.html'
    for i in range(1, 12):
        try:
            url = base_url.format(i)
            r = http.request('GET', url)
            soup = BeautifulSoup(r.data, features="html.parser")
            dal_event_nodes = soup.findAll("div", class_="h4-placeholder")
            for event_node in dal_event_nodes:
                link = event_node.find("a").get("href").strip()
                name = event_node.find("a").get_text().strip()
                event_obj = Event(name, link)
                print("GET - {}".format(event_obj.name))
                event_obj = get_event_details(event_obj)
                final_list.append(event_obj)
        except Exception as e:
            print(e)
    generate_id(final_list)
    xml_list.from_list(final_list)
    return xml_list