import django django.setup() from django_crawl.models import BlogData BASE_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_blog(): html_url = 'http://cms.pknu.ac.kr/pknujob/view.do?no=2342' req = requests.get(html_url) html = req.text soup = BeautifulSoup(html, 'html.parser') my_titles = soup.find('ul', {'id': 'board_list'}) my_titles = my_titles.select('li > a') data = {} for title in my_titles: data[title.find("h4").text] = [ title.get('href'), title.find('span', { 'class': 'date' }).text ] return data if __name__ == '__main__': blog_data_dict = parse_blog() for t, l in blog_data_dict.items(): try: BlogData(title=t, link=l[0], tag='job_general', date=l[1]).save() except: pass
# my_googles = soup.find_all('div', {'class':'BNeawe vvjwJb AP7Wnd'}) #case incruit req = requests.get('http://www.pknu.ac.kr/usrBoardActn.do?p_bm_idx=5&p_boardcode=PK10000005&p_sbsidx=2') html = req.text soup = BeautifulSoup(html, 'html.parser') my_incruit = soup.find_all('td', {'class':'title'}) data = {} for title in my_incruit: data[title.text] = title.get('href') return data if __name__=='__main__': blog_data_dict = parse_blog() for t, l in blog_data_dict.items(): BlogData(title=t).save() # req = requests.get('https://www.google.com/search?q=%ED%81%AC%EB%A1%A4%EB%9F%AC&rlz=1C1OKWM_enKR881KR881&oq=%ED%81%AC%EB%A1%A4%EB%9F%AC&aqs=chrome..69i57j0l4j69i61l3.2578j0j7&sourceid=chrome&ie=UTF-8') # html = req.text # soup = BeautifulSoup(html, 'html.parser') # my_googles = soup.find_all('div', {'class':'BNeawe vvjwJb AP7Wnd'}) def my_cron_job(): req = requests.get('http://www.pknu.ac.kr/usrBoardActn.do?p_bm_idx=5&p_boardcode=PK10000005&p_sbsidx=2') html = req.text soup = BeautifulSoup(html, 'html.parser') my_incruit = soup.find_all('td', {'class':'title'}) data = {} for title in my_incruit:
import django django.setup() from django_crawl.models import BlogData BASE_DIR = os.path.dirname(os.path.abspath(__file__)) def parse_blog(): html_url = 'http://cms.pknu.ac.kr/lincplus/view.do?no=9100' req = requests.get(html_url) html = req.text soup = BeautifulSoup(html, 'html.parser') my_titles = soup.find('ul', {'id': 'board_list'}) my_titles = my_titles.select('li > a') data = {} for title in my_titles: data[title.find("h4").text] = [ title.get('href'), title.find('span', { 'class': 'date' }).text ] return data if __name__ == '__main__': blog_data_dict = parse_blog() for t, l in blog_data_dict.items(): try: BlogData(title=t, link=l[0], tag='link', date=l[1]).save() except: pass