def _crawl_transfer(year, url): soup = get_page(url) links = soup.find('div', 'clearfix').find_all('a') for link in links[1:]: text = link.text link = link.get('href', '') target = urljoin(url, link) sub = get_or_create(Subject, text) t = get_or_create(Examtype, TRANSFER_EXAMS) sheet = Sheet(target, year, None, sub, t) db.session.add(sheet) db.session.commit()
def _crawl_detail(department_name, url): depart = get_or_create(Department, department_name) soup = get_page(url) years = soup.find('table', 'listview').find_all('tr') for year in years[1:]: which_year = year.find_all('td')[0].text links = year.find_all('a') for link in links: text = link.text link = link.get('href', '') target = urljoin(url, link) sub = get_or_create(Subject, text) t = get_or_create(Examtype, AFTER_GRADUATE_EXAMS) sheet = Sheet(target, int(which_year), depart, sub, t) db.session.add(sheet) db.session.commit()