Exemple #1
0
def scrape_libraries_services():
    print("*************** Scraping Libraries and Services *********************")
    soup = get_soup(base_url)
    dl = soup.find("dl", {'id': 'locations-table'})
    anchor_nodes = dl.find_all("a")
    last_library = None

    for a_node in anchor_nodes:
        name = a_node.find_next("dt").get_text()
        link = a_node.get('href')
        if name.startswith(' '):
            # is a lib service
            service = LibraryService(name, link)
            service.library = last_library
            populate_details(service)
            services.append(service)
        else:
            # is a lib
            lib = Library(name, link)
            last_library = lib
            populate_details(lib)
            libraries.append(lib)
        generate_id(libraries)
        generate_id(services)
        xml_lib_list = XmlList().from_list(libraries)
        xml_lib_list.save("library.xml")
        xml_services_list = XmlList().from_list(services)
        xml_services_list.save("library_service.xml")
def scrapeCampusServices():
    print("*************** Scraping Campus Services *********************")
    soup = get_soup('https://www.dal.ca/faculty_staff.html')
    service_nodes = soup.find_all("h4", class_="c-title")
    service_list: List[CampusService] = list()
    web_link_list: List[WebLinks] = list()

    service_id = 0
    for node in service_nodes:
        service_id = service_id + 1
        link_nodes = node.find_next("ul").find_all("li")
        service = node.find_next("h4").find_next("a")
        service_url = service.get("href")
        service_url = dal_prefix(service_url)
        service_name = service.get_text()
        campus_service = CampusService(service_name, service_url)
        campus_service.id = service_id
        service_list.append(campus_service)
        for link_node in link_nodes:
            link = link_node.find_next("a")
            url = link.get('href')
            url = dal_prefix(url)
            text = link.get_text()
            web_link = WebLinks(text, url, service_name)
            web_link.service_id = service_id
            web_link_list.append(web_link)

    xml_camp_service = XmlList()
    xml_camp_service.from_list(service_list)
    xml_camp_service.save("campus_service.xml")

    xml_web_links = XmlList()
    xml_web_links.from_list(web_link_list)
    xml_web_links.save("web_links.xml")
Exemple #3
0
def scrape_buildings():
    campus_ids = dict()
    buildings = get_buildings(campus_ids)
    b_list = XmlList()
    amenities: Dict[str, int] = dict()
    generate_id(buildings)
    for b in buildings:
        b_list.add(b.to_xml_obj())
        for key in b.amenities.keys():
            amenities[key] = 1
    build_ids_from_dict(amenities)
    # save_to_file(dict_to_xml_rows(amenities, "amenity"), "amenity.xml")
    save_to_file(dict_to_xml_rows(campus_ids, "campus"), "campus.xml")
    b_list.save("building.xml")