def scrape_adsu(url=ADSU_URL): """Get information about the adsu in a crazy way due to their bitching page made like shit""" soup = utils.get_soup_from_url(url).find(id="AutoNumber5") info = soup.text.replace(" ", "").replace("\t", "").replace("\r", "").replace("\n\n", "") return {"info": info}
def scrape_student_office(url=STUDENT_OFFICE_URL): """Get info about the student service office""" soup = utils.get_soup_from_url(url) area = soup.find(text='AREA SCIENTIFICA').parent.parent.find_next_sibling() address, phone, email, hours = area.find_all(class_='address_table_description') return { 'indirizzo': address.text, 'telefono': phone.text, 'e-mail': email.text, 'orari': hours.text.strip().replace('13', '13, ') }
def scrape_professors(url=PROFESSORS_URL): """Get information about professors""" scraped_professors = [] soup = utils.get_soup_from_url(url) professor_names = soup.find("table").find_all(colspan='2') for name_cell in professor_names: name, phone, email, courses, _ = name_cell.parent.find_all('td') scraped_professors.append({ "nome": name.text or "non disponibile", "telefono": phone_cleanup(phone.text) or "non disponibile", "e-mail": email_soup_cleanup(email) or "non disponibile", "corsi": courses_cleanup(courses.text) or "non disponibile", }) return scraped_professors